

/*************************************************************************
**********************************(1)************************************* 
Reading raw data
***************************************************************************/

foreach year in "2005"  "2006" "2007" "2008"  "2009"  "2010" "2011"{
foreach type in  "01`year'" "02`year'" "03`year'" "04`year'" "05`year'" "06`year'" "07`year'" "08`year'"  "09`year'" "10`year'" "11`year'" "12`year'"{
clear

insheet using "${rawdata}501_`type'.txt", delimiter(|)

rename	v1  pataduanal
rename	v2	numpmento
rename	v3	cvesecadua
rename	v4	cvetipooperacion
rename	v5	cveddocumento
rename	v6	cveseccionaduanera
rename	v7	curpcontribuyente
rename	v8	rfc
rename	v9	curpagenteaduanal
rename	v10	tipodecambio
rename	v11	totalfletes
rename	v12	totalseguros
rename	v13	totaldembalajes
rename	v14	totalotrosincrementables
rename	v15	totalotrosdeducibles
rename	v16	pesobrutodelamercancia
rename	v17	cvetransportesalida
rename	v18	cvetransportedarribo
rename	v19	cvetranporteentrasaldelpais
rename	v20	cvedestinodmercancia
rename	v21	nombrecontribuyente
rename	v22	calledomcontribuyente
rename	v23	numintdomcontribuyente
rename	v24	numextdomcontribuyente
rename	v25	cpdomcontribuyebte
rename	v26	municioiociudadcontribuyente
rename	v27	cveentidadfedcontribuyente
rename	v28	cveopaiscontribuyente
rename	v29	fechadebalanza
rename	v30	banderaddespacho
sort pataduanal numpmento cvesecadua
save "${temp}501-`type'use", replace
}
}


foreach year in "2005" "2006" "2007"  "2008"  "2009"  "2010" "2011" {
foreach type in  "01`year'" "02`year'" "03`year'" "04`year'" "05`year'" "06`year'" "07`year'" "08`year'"  "09`year'" "10`year'" "11`year'" "12`year'"{
clear
insheet using "${rawdata}505_`type'.txt", delimiter(|)
rename	v1	pataduanal
rename	v2	numpmento
rename	v3	cvesecadua
rename	v4	fechafacturacion
rename	v5	numerodefactura
rename	v6	cveterminofacturacion
rename	v7	cvemonedafacturacion
rename	v8	valordolares
rename	v9	valorenmonedaextranjera
rename	v10	cvepaisfacturacion
rename	v11	cveentidadfederativafacturacion
rename	v12	idfisicadelproveedor
rename	v13	proveedordmarcancia
rename	v14	calledomproveedor
rename	v15	cumintproveedor
rename	v16	numextproveedor
rename	v17	cpdomproeedor
rename	v18	municipiociudaddomproveedor
rename	v19	fecbalanza
keep  pataduanal numpmento cvesecadua cveterminofacturacion valordolares idfisicadelproveedor proveedordmarcancia cvepaisfacturacion calledomproveedor cumintproveedor numextproveedor cpdomproeedor municipiociudaddomproveedor
sort pataduanal numpmento cvesecadua 
save "${temp}505-`type'use", replace
}
}



foreach year in "2005"  "2006" "2007" "2008"  "2009"  "2010" "2011" {
foreach type in  "01`year'" "02`year'" "03`year'" "04`year'" "05`year'" "06`year'" "07`year'" */ "08`year'"  "09`year'" "10`year'" "11`year'" "12`year'"{
clear
insheet using "${rawdata}551_`type'.txt", delimiter(|)

rename	v1	pataduanal
rename	v2	numpmento
rename	v3	cvesecadua
rename	v4	fraccionarancelaria
rename	v5	secuencia_fraccionarancelaria
rename	v6	division_fraccionarancelaria
rename	v7	descripcion_mercancia
rename	v8	preciounitario
rename	v9	valorenaduana
rename	v10	valorcomercial
rename	v11	valorendolares
rename	v12	cantunidadmedida
rename	v13	cveunidadmedidacomercial
rename	v14	cantomercanciaunidadesmedida
rename	v15	cve_dudmd_tarifa
rename	v16	valoragregado
rename	v17	cvedvinculacion
rename	v18	cve_metodo_valorizacion
rename	v19	codigo_mercancia_o_productos
rename	v20	marca_mercancia_producto
rename	v21	modelo_mercancia_o_producto
rename	v22	cvepaisorigendestino
rename	v23	cvepaiscompradorvendedor
rename	v24	cveentidadfederativaorigen
rename	v25	cveentidadfederativadestino
rename	v26	cveentidadfederativacomprador
rename	v27	cveentidadfederativavendedor
rename	v28	fecha_balanza
keep pataduanal numpmento cvesecadua fraccionarancelaria preciounitario valorenaduana valorcomercial valorendolares cantomercanciaunidadesmedida cveunidadmedidacomercial cvedvinculacion  cvepaisorigendestino cvepaiscompradorvendedor cveentidadfederativa***
sort pataduanal numpmento cvesecadua
drop if pataduanal==.
save "${temp}551-`type'use", replace
}
}


foreach year in "2004" {
foreach type in  "06`year'" "07`year'" "08`year'"  "09`year'" "10`year'" "11`year'" "12`year'"{
clear

insheet using "${rawdata}501_`type'.txt", delimiter(|)

rename	v1  pataduanal
rename	v2	numpmento
rename	v3	cvesecadua
rename	v4	cvetipooperacion
rename	v5	cveddocumento
rename	v6	cveseccionaduanera
rename	v7	curpcontribuyente
rename	v8	rfc
rename	v9	curpagenteaduanal
rename	v10	tipodecambio
rename	v11	totalfletes
rename	v12	totalseguros
rename	v13	totaldembalajes
rename	v14	totalotrosincrementables
rename	v15	totalotrosdeducibles
rename	v16	pesobrutodelamercancia
rename	v17	cvetransportesalida
rename	v18	cvetransportedarribo
rename	v19	cvetranporteentrasaldelpais
rename	v20	cvedestinodmercancia
rename	v21	nombrecontribuyente
rename	v22	calledomcontribuyente
rename	v23	numintdomcontribuyente
rename	v24	numextdomcontribuyente
rename	v25	cpdomcontribuyebte
rename	v26	municioiociudadcontribuyente
rename	v27	cveentidadfedcontribuyente
rename	v28	cveopaiscontribuyente
rename	v29	fechadebalanza
rename	v30	banderaddespacho
sort pataduanal numpmento cvesecadua
save "${temp}501-`type'use", replace

clear
insheet using "${rawdata}505_`type'.txt", delimiter(|)

rename	v1	pataduanal
rename	v2	numpmento
rename	v3	cvesecadua
rename	v4	fechafacturacion
rename	v5	numerodefactura
rename	v6	cveterminofacturacion
rename	v7	cvemonedafacturacion
rename	v8	valordolares
rename	v9	valorenmonedaextranjera
rename	v10	cvepaisfacturacion
rename	v11	cveentidadfederativafacturacion
rename	v12	idfisicadelproveedor
rename	v13	proveedordmarcancia
rename	v14	calledomproveedor
rename	v15	cumintproveedor
rename	v16	numextproveedor
rename	v17	cpdomproeedor
rename	v18	municipiociudaddomproveedor
rename	v19	fecbalanza


keep  pataduanal numpmento cvesecadua cveterminofacturacion valordolares idfisicadelproveedor proveedordmarcancia cvepaisfacturacion calledomproveedor cumintproveedor numextproveedor cpdomproeedor municipiociudaddomproveedor
sort pataduanal numpmento cvesecadua 
save "${temp}505-`type'use", replace

clear
insheet using "${rawdata}551_`type'.txt", delimiter(|)

rename	v1	pataduanal
rename	v2	numpmento
rename	v3	cvesecadua
rename	v4	fraccionarancelaria
rename	v5	secuencia_fraccionarancelaria
rename	v6	division_fraccionarancelaria
rename	v7	descripcion_mercancia
rename	v8	preciounitario
rename	v9	valorenaduana
rename	v10	valorcomercial
rename	v11	valorendolares
rename	v12	cantunidadmedida
rename	v13	cveunidadmedidacomercial
rename	v14	cantomercanciaunidadesmedida
rename	v15	cve_dudmd_tarifa
rename	v16	valoragregado
rename	v17	cvedvinculacion
rename	v18	cve_metodo_valorizacion
rename	v19	codigo_mercancia_o_productos
rename	v20	marca_mercancia_producto
rename	v21	modelo_mercancia_o_producto
rename	v22	cvepaisorigendestino
rename	v23	cvepaiscompradorvendedor
rename	v24	cveentidadfederativaorigen
rename	v25	cveentidadfederativadestino
rename	v26	cveentidadfederativacomprador
rename	v27	cveentidadfederativavendedor
rename	v28	fecha_balanza


keep pataduanal numpmento cvesecadua fraccionarancelaria preciounitario valorenaduana valorcomercial valorendolares cantomercanciaunidadesmedida cveunidadmedidacomercial cvedvinculacion  cvepaisorigendestino cvepaiscompradorvendedor cveentidadfederativa***
sort pataduanal numpmento cvesecadua
drop if pataduanal==.
save "${temp}551-`type'use", replace

}
}


/*************************************************************************
**********************************(2)************************************* 
Creating the list of transportation companies
***************************************************************************/

forvalues i=5/9 {
foreach type in "01200`i'" "02200`i'" "03200`i'" "04200`i'" "05200`i'" "06200`i'" "07200`i'" "08200`i'" "09200`i'" "10200`i'" "11200`i'" "12200`i'" {
clear 
insheet using "${rawdata}502_`type'.txt", delim("|")
rename v1 pataduanal
rename v2 numpmento
rename v3 cvesecadua
rename v4 rfctransta
rename v5 curptransp
rename v6 nomtransta
rename v7 cvpatransp
rename v8 iddeltrans
rename v9 fecbalanza 
save "${temp}502_`type'.dta", replace 
}
}

forvalues i=5/9 {
foreach type in "01200`i'" "02200`i'" "03200`i'" "04200`i'" "05200`i'" "06200`i'" "07200`i'" "08200`i'" "09200`i'" "10200`i'" "11200`i'" "12200`i'" {
append using "${temp}502_`type'.dta"
}
}

/*** create the list of unique RFC***/
keep rfctransta
drop if rfctransta==""
duplicates drop 

gen transport=1
sort rfctransta
save "${output_data}transportista_rfclist", replace

forvalues i=5/9 {
foreach type in "01200`i'" "02200`i'" "03200`i'" "04200`i'" "05200`i'" "06200`i'" "07200`i'" "08200`i'" "09200`i'" "10200`i'" "11200`i'" "12200`i'" {
erase "${temp}502_`type'.dta"
}
}


/*************************************************************************
**********************************(3)************************************* 
Connecting dataset for later use
***************************************************************************/


foreach yyy in  "2005" "2006" "2007" "2008" "2009" "2010" "2011"{
foreach type in "01`yyy'" "02`yyy'" "03`yyy'" "04`yyy'" "05`yyy'" "06`yyy'" "07`yyy'" "08`yyy'" "09`yyy'" "10`yyy'"  "11`yyy'" "12`yyy'"{
use "${temp}501-`type'use.dta", replace
keep if cvetipooperacion==2 
merge 1:m pataduanal numpmento cvesecadua using "${temp}505-`type'use"
tab _merge
keep if _merge==3
drop _merge
sort pataduanal numpmento cvesecadua
collapse (sum) valordolares, by (pataduanal numpmento cvesecadua idfisicadelproveedor proveedordmarcancia calledomproveedor cumintproveedor numextproveedor cpdomproeedor municipiociudaddomproveedor)
by pataduanal numpmento cvesecadua: egen tot=sum(valordolares)
gen share_dest=valordolares/tot
bysort pataduanal numpmento cvesecadua: gen nnn=_n
bysort pataduanal numpmento cvesecadua: egen np=max(nnn)
gen multi=(np!=1)
drop nnn np
save "${temp}505-`type'exp", replace
keep if multi==0
save "${temp}505-`type'exp_single", replace
}


foreach type in "01`yyy'" "02`yyy'" "03`yyy'" "04`yyy'" "05`yyy'" "06`yyy'" "07`yyy'" "08`yyy'" "09`yyy'"  "10`yyy'" "11`yyy'" "12`yyy'"{
use "${temp}501-`type'use.dta", replace
keep if cvetipooperacion==2
merge 1:m pataduanal numpmento cvesecadua using "${temp}551-`type'use"
tab _merge
keep if _merge==3
drop _merge
save "${temp}551-`type'exp", replace
}


foreach type in  "01`year'" "02`year'" "03`year'" "04`year'" "05`year'" "06`year'" "07`year'" "08`year'"  "09`year'" "10`year'" "11`year'" "12`year'"{
use 501-`type'use, clear 
keep if cvetipooperacion==2
keep if cveddocumento!=""
sort pataduanal	numpmento cvesecadua
duplicates drop 
merge 1:m pataduanal numpmento cvesecadua using "${temp}551-`type'use.dta"
keep if _merge==3
save "${temp}501_551_`type'use", replace 
}


foreach type in "01`yyy'" "02`yyy'" "03`yyy'" "04`yyy'" "05`yyy'" "06`yyy'" "07`yyy'" "08`yyy'" "09`yyy'"  "10`yyy'" "11`yyy'" "12`yyy'"{
use "${temp}551-`type'exp", replace
drop if valorendolares==0
gen small=(valorendolares<2000)
drop if cveddocumento==""
sort pataduanal numpmento cvesecadua
by pataduanal numpmento cvesecadua: egen tot=sum(valorendolares)
gen share_product=valorendolares/tot
joinby pataduanal numpmento cvesecadua using "${temp}505-`type'exp", unmatched(master)
gen imputevalor=valordolares*share_product
gen imputevalor2=valorendolares*share_dest
gen destination=proveedordmarcancia
gen iddest=""
replace iddest=idfisicadelproveedor
gen destfile=.
gen street=calledomproveedor
gen number_int=cumintproveedor
gen number_ext=numextproveedor
gen cp=cpdomproeedor
gen mun=municipiociudaddomproveedor
keep  pataduanal numpmento cvesecadua fraccionarancelaria valorcomercial valorenaduana preciounitario cveddocumento rfc cpdomcon destfile valordolares valorendolares  cvedvinculacion cvepaisorigendestino cvepaiscomp destination iddest street number_int number_ext cp mun imputevalor imputevalor2 share_dest share_product small multi
gen ntrans=1
gen mmyyyy="A"
replace mmyyyy="`type'"
gen month=substr(mmyyyy,1,2)
destring month, replace
gen year=substr(mmyyyy, 3,4)
destring year, replace
collapse (sum) ntrans valorendolares imputevalor imputevalor2, by (rfc cpd fraccionarancelaria  iddest destination cveddocumento cvepaisorigendestino cvepaiscomp destfile street number_int number_ext cp mun small month multi year)
save "${temp}destination-info-pair-`type'complete_multi2", replace
}
}


foreach yyy in "2004" {
foreach type in  "06`yyy'" "07`yyy'" "08`yyy'" "09`yyy'" "10`yyy'"  "11`yyy'" "12`yyy'"{
use "${temp}501-`type'use.dta", replace
keep if cvetipooperacion==2 
merge 1:m pataduanal numpmento cvesecadua using "${temp}505-`type'use"
tab _merge
keep if _merge==3
drop _merge
sort pataduanal numpmento cvesecadua
collapse (sum) valordolares, by (pataduanal numpmento cvesecadua idfisicadelproveedor proveedordmarcancia calledomproveedor cumintproveedor numextproveedor cpdomproeedor municipiociudaddomproveedor)
by pataduanal numpmento cvesecadua: egen tot=sum(valordolares)
gen share_dest=valordolares/tot
bysort pataduanal numpmento cvesecadua: gen nnn=_n
bysort pataduanal numpmento cvesecadua: egen np=max(nnn)
gen multi=(np!=1)
drop nnn np
save "${temp}505-`type'exp", replace
keep if multi==0
save "${temp}505-`type'exp_single", replace
}


foreach type in  "06`yyy'" "07`yyy'" "08`yyy'" "09`yyy'"  "10`yyy'" "11`yyy'" "12`yyy'"{
use "${temp}501-`type'use.dta", replace
keep if cvetipooperacion==2
merge 1:m pataduanal numpmento cvesecadua using "${temp}551-`type'use"
tab _merge
keep if _merge==3
drop _merge
save "${temp}551-`type'exp", replace
}


foreach type in  "06`yyy'" "07`yyy'" "08`yyy'" "09`yyy'"  "10`yyy'" "11`yyy'" "12`yyy'"{
use "${temp}551-`type'exp", replace
drop if valorendolares==0
gen small=(valorendolares<2000)
drop if cveddocumento==""
sort pataduanal numpmento cvesecadua
by pataduanal numpmento cvesecadua: egen tot=sum(valorendolares)
gen share_product=valorendolares/tot
joinby pataduanal numpmento cvesecadua using "${temp}505-`type'exp", unmatched(master)
gen imputevalor=valordolares*share_product
gen imputevalor2=valorendolares*share_dest
gen destination=proveedordmarcancia
gen iddest=""
replace iddest=idfisicadelproveedor
gen destfile=.
gen street=calledomproveedor
gen number_int=cumintproveedor
gen number_ext=numextproveedor
gen cp=cpdomproeedor
gen mun=municipiociudaddomproveedor
keep  pataduanal numpmento cvesecadua fraccionarancelaria valorcomercial valorenaduana preciounitario cveddocumento rfc cpdomcon destfile valordolares valorendolares  cvedvinculacion cvepaisorigendestino cvepaiscomp destination iddest street number_int number_ext cp mun imputevalor imputevalor2 share_dest share_product small multi
gen ntrans=1
gen mmyyyy="A"
replace mmyyyy="`type'"
gen month=substr(mmyyyy,1,2)
destring month, replace
gen year=substr(mmyyyy, 3,4)
destring year, replace
collapse (sum) ntrans valorendolares imputevalor imputevalor2, by (rfc cpd fraccionarancelaria  iddest destination  cveddocumento cvepaisorigendestino cvepaiscomp destfile street number_int number_ext cp mun small month multi year)
save "${temp}destination-info-pair-`type'complete_multi2", replace
}
}

**Data with price


foreach yyy in "2010" "2011" "2005" "2006" "2007" "2008" "2009" {
foreach type in "01`yyy'" "02`yyy'" "03`yyy'" "04`yyy'" "05`yyy'" "06`yyy'" "07`yyy'" "08`yyy'" "09`yyy'"  "10`yyy'" "11`yyy'" "12`yyy'"{
use "${temp}551-`type'exp", replace
drop if valorendolares==0
gen small=(valorendolares<2000)
drop if cveddocumento==""

sort pataduanal numpmento cvesecadua
by pataduanal numpmento cvesecadua: egen tot=sum(valorendolares)
gen share_product=valorendolares/tot
joinby pataduanal numpmento cvesecadua using "${temp}505-`type'exp", unmatched(master)
gen imputevalor=valordolares*share_product
gen imputevalor2=valorendolares*share_dest
gen destination=proveedordmarcancia
gen iddest=""
replace iddest=idfisicadelproveedor
gen destfile=.
gen street=calledomproveedor
gen number_int=cumintproveedor
gen number_ext=numextproveedor
gen cp=cpdomproeedor
gen mun=municipiociudaddomproveedor
gen ntrans=1
gen mmyyyy="A"
replace mmyyyy="`type'"
gen month=substr(mmyyyy,1,2)
destring month, replace
gen year=substr(mmyyyy, 3,4)
destring year, replace
keep pataduanal numpmento cvesecadua  rfc cpdomc fraccionarancelaria valorendolares imputevalor imputevalor2 canto cveunidad  preciounitario iddest destination cveddocumento cvepaisorigendestino cvepaiscomp destfile street number_int number_ext cp mun small month multi year
save "${temp}destination-info-pair-`type'complete_multi2price", replace
}
}


foreach yyy in "2004" {
foreach type in "06`yyy'" "07`yyy'" "08`yyy'" "09`yyy'"  "10`yyy'" "11`yyy'" "12`yyy'"{
use "${temp}551-`type'exp", replace
drop if valorendolares==0
gen small=(valorendolares<2000)
drop if cveddocumento==""
sort pataduanal numpmento cvesecadua
by pataduanal numpmento cvesecadua: egen tot=sum(valorendolares)
gen share_product=valorendolares/tot
joinby pataduanal numpmento cvesecadua using "${temp}505-`type'exp", unmatched(master)
gen imputevalor=valordolares*share_product
gen imputevalor2=valorendolares*share_dest
gen destination=proveedordmarcancia
gen iddest=""
replace iddest=idfisicadelproveedor
gen destfile=.
gen street=calledomproveedor
gen number_int=cumintproveedor
gen number_ext=numextproveedor
gen cp=cpdomproeedor
gen mun=municipiociudaddomproveedor
gen ntrans=1
gen mmyyyy="A"
replace mmyyyy="`type'"
gen month=substr(mmyyyy,1,2)
destring month, replace
gen year=substr(mmyyyy, 3,4)
destring year, replace
keep pataduanal numpmento cvesecadua  rfc cpdomc fraccionarancelaria valorendolares imputevalor imputevalor2 canto cveunidad preciounitario iddest destination cveddocumento cvepaisorigendestino cvepaiscomp destfile street number_int number_ext cp mun small month multi year
save "${temp}destination-info-pair-`type'complete_multi2price", replace
}
}

/*************************************************************************
**********************************(4)************************************* 
Cleaning
***************************************************************************/



capture program drop getDestinationHS
program define getDestinationHS
   {

/*** Assign information on whether a transaction is made by individual and by transportation company***/   

foreach type in  "2005" "2006" "2007"   "2008"   "2009" "2010" "2011"{
foreach mm in "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12"{
use "${temp}destination-info-pair-`mm'`type'complete_multi2", replace
drop if multi==1
drop multi
keep if cvepaisorigendestino=="USA"
keep if cvepaiscomp=="USA"
gen frfc=substr(rfc, 4, 1)
destring frfc, replace force
gen firm=(frfc!=.)
drop frfc
sort rfc
rename rfc rfctransta 
merge m:1 rfctransta using "${output_data}transportista_rfclist.dta"
gen transportista=0 
replace transportista=1 if _merge==3
drop if _merge==2
drop _merge
rename rfctransta rfc
collapse (sum) valorendolares ntrans, by (rfc fraccionarancelaria cveddocument destination iddest street number_int number_ext cp mun firm transportista destfile small year month)
save "${temp}destination-info-pair-`mm'`type'complete2_rfc", replace
}

use "${temp}destination-info-pair-01`type'complete2_rfc", replace
save "${temp}destination-info-pair-`type'complete2_rfc", replace

foreach mm in "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12"{
use "${temp}destination-info-pair-`type'complete2_rfc", replace
append using "${temp}destination-info-pair-`mm'`type'complete2_rfc"
save, replace
}
}

foreach type in "2004" {
foreach mm in "06" "07" "08" "09" "10" "11" "12"{
use "${temp}destination-info-pair-`mm'`type'complete_multi2", replace
keep if cvepaisorigendestino=="USA"
keep if cvepaiscomp=="USA"
drop if multi==1
drop multi
gen frfc=substr(rfc, 4, 1)
destring frfc, replace force
gen firm=(frfc!=.)
drop frfc
sort rfc
rename rfc rfctransta 
merge m:1 rfctransta using "${output_data}transportista_rfclist.dta"
gen transportista=0 
replace transportista=1 if _merge==3
drop if _merge==2
drop _merge
rename rfctransta rfc
collapse (sum) valorendolares ntrans, by (rfc fraccionarancelaria cveddocument  destination iddest street number_int number_ext cp mun firm destfile transportista small year month)
save "${temp}destination-info-pair-`mm'`type'complete2_rfc", replace
}

use "${temp}destination-info-pair-06`type'complete2_rfc", replace
save "${temp}destination-info-pair-`type'complete2_rfc", replace

foreach mm in  "07" "08" "09" "10" "11" "12"{
use "${temp}destination-info-pair-`type'complete2_rfc", replace
append using "${temp}destination-info-pair-`mm'`type'complete2_rfc"
save, replace
}
}



local i=`1'
while `i'<=`2'{
foreach type in  "2004" "2005" "2006" "2007"  "2008"  "2009" "2010" "2011"{
use "${temp}destination-info-pair-`type'complete2_rfc", replace
gen definite=.
replace definite=1 if cveddocument=="A1" 
replace definite=2 if cveddocument=="RT" 
replace definite=1 if cveddocument=="A1"
replace definite=1 if cveddocument=="G1"
replace definite=1 if cveddocument=="G6"
replace definite=1 if cveddocument=="G7"
replace definite=1 if cveddocument=="L1"
replace definite=1 if cveddocument=="T1"
replace definite=1 if cveddocument=="F8"
replace definite=1 if cveddocument=="F9"

replace definite=2 if cveddocument=="I1"
replace definite=2 if cveddocument=="J1"
replace definite=2 if cveddocument=="J2"
replace definite=2 if cveddocument=="RT"

replace definite=4 if cveddocument=="BA"
replace definite=4 if cveddocument=="BE"
replace definite=4 if cveddocument=="BM"
replace definite=4 if cveddocument=="BO"
replace definite=4 if cveddocument=="J3"
replace definite=4 if cveddocument=="BP"
replace definite=4 if cveddocument=="BR"
replace definite=4 if cveddocument=="J4"
gen maquila=0
replace maquila=1 if definite==2 | definite==4
gen dzero=(frac<10000000)
tostring frac, replace
replace frac="0"+frac if dzero==1
gen hs2=substr(frac, 1, 2)
destring hs2, replace
keep if hs2==`i'
collapse (sum) valorendolares ntrans, by (rfc frac maquila destination iddest street number_ext number_int cp mun firm transportista destfile year month small)
keep rfc maquila valorendolares frac ntrans destination iddest street number_ext number_int cp mun firm destfile transportista small year month
save "${temp}destination_us_`type'detail_hs`i'_rfcfrac.dta", replace
}
local i=`i'+1
}


local i=`1'
while `i'<=`2'{
use "${temp}destination_us_2004detail_hs`i'_rfcfrac.dta", replace
save "${temp}custom_dest_registry2004to2011_hs`i'_byyear_rfcfrac", replace

foreach type in "2005" "2006" "2007"  "2008" "2009" "2010" "2011"{
use "${temp}custom_dest_registry2004to2011_hs`i'_byyear_rfcfrac", replace
append using "${temp}destination_us_`type'detail_hs`i'_rfcfrac.dta"
save, replace
}

use "${temp}custom_dest_registry2004to2011_hs`i'_byyear_rfcfrac", replace
collapse (sum) valorendolares ntrans (max) firm (min) transportista, by (rfc maquila frac year month destination iddest street number_ext number_int small cp mun)
sort destination iddest cp street number_ext number_int mun
gen hs=`i'
gen id1=_n
egen id2=group(destination)
save "${temp}custom_dest_registry2004to2011_hs`i'_rfcfrac", replace
local i=`i'+1
}
   }
end



/* Second part cleans slash "/" "c/o" and "y/o, creating several different names.*/


capture program drop cleanNames
program define cleanNames
   {
     use `1', replace
     display "cleanNames"`3'
	 gen `2'=`3'

     replace `2' = upper(`2')
	replace `2' = trim(`2')
	replace `2'="" if length(`2')==1
replace `2'=subinstr(`2', ".", "",.)
replace `2'=subinstr(`2', ",", " ",.)
replace `2'=subinstr(`2', "  "," ",.)
replace `2'=subinstr(`2', "`"," ",.)
replace `2'=subinstr(`2', "(", "",.)
replace `2'=subinstr(`2', ")", "",.)
     replace `2' = subinstr(`2',"-","",.) 
     replace `2' = subinstr(`2',"'","",.)
	 replace `2' = subinstr(`2',"//","/",.) 
     replace `2' = subinstr(`2',"/"," / ",.)
     replace `2' = subinstr(`2',"&"," & ",.)
	 replace `2' = subinword(`2',"CORP"," ",.)
	 replace `2' = subinword(`2',"CO"," ",.)
	 replace `2' = subinword(`2',"INC"," ",.)
	 replace `2' = subinword(`2',"LTD"," ",.)
	 replace `2' = subinword(`2',"COMPANY"," ",.)
	 replace `2' = subinword(`2',"INC."," ",.)
	 replace `2' = subinword(`2',"LTD."," ",.)
	 replace `2' = subinword(`2',"LLC."," ",.)
	 replace `2' = subinword(`2',"LLC"," ",.)
	 replace `2' = subinword(`2',"LP"," ",.)
	 replace `2' = subinword(`2',"CORP."," ",.)
	 replace `2' = subinword(`2',"CO."," ",.)
	 replace `2' = subinword(`2',"COINC"," ",.)
	 replace `2' = subinword(`2',"COLLC"," ",.)
	 gen incend=(substr(word(`2', -1), -3, .)=="INC")
	 replace  `2' = reverse(`2') if incend==1
	 replace  `2' = subinstr( `2',"CNI"," ",1) if incend==1 & length(word(`2', 1))>4
     replace  `2' = reverse(`2') if incend==1
	 drop incend
	 replace `2' = subinword(`2',"CORPORATION"," ",.)
	 replace `2' = subinword(`2',"&","AND",.)
	 	 replace `2' = subinword(`2',"AND"," ",.)
	 replace `2' = subinword(`2',"CORPORATION."," ",.)
	 replace `2' = subinword(`2',"INCORPORATED"," ",.)
     replace `2' = subinword(`2',"INCORPORATION"," ",.)
     replace `2' = subinword(`2',"INCORPORATE"," ",.)
     replace `2' = subinword(`2',"INCORP"," ",.)
	 replace `2' = subinword(`2',"INTERNATIONALINC","INTERNATIONAL",.)
	 replace `2' = subinword(`2',"BROS","BROTHERS",.)
	 replace `2' = subinword(`2',"PROD","PRODUCTS",.)
	 	 replace `2' = subinword(`2',"PRODS","PRODUCTS",.)
		 	 replace `2' = subinword(`2',"DIST","DISTRIBUTORS",.)
	 replace `2' = subinword(`2',"USAINC","USA",.)
	 replace `2' = subinword(`2',"THE"," ",.)
	 replace `2' = subinword(`2',"HOLDING"," ",.)
	 replace `2' = subinword(`2',"HOLDINGS"," ",.)
	 replace `2' = subinword(`2',"COMPANIES"," ",.)
	 replace `2' = subinword(`2',"LIMITED"," ",.)	
	 	 replace `2' = subinword(`2',"HLDGS"," ",.)	
	 replace `2' = subinword(`2',"INTL","INTERNATIONAL",.)
		 		 replace `2' = subinword(`2',"C / O","C/O",.)
				 replace `2' = subinword(`2',"Y / O","Y/O",.)
				 replace `2' = subinword(`2',"UNITED STATES","US",.)
replace `2'=subinword(`2', "1","One",.)
replace `2'=subinword(`2', "2","TWO",.)
replace `2'=subinword(`2', "3","THREE",.)
replace `2'=subinword(`2', "4","FOUR",.)
replace `2'=subinword(`2', "5","FIVE",.)
replace `2'=subinword(`2', "6","SIX",.)
replace `2'=subinword(`2', "7","SEVEN",.)
replace `2'=subinword(`2', "8","EIGHT",.)
replace `2'=subinword(`2', "9","NINE",.)
replace `2'=subinword(`2', "10","TEN",.)
replace `2'=subinword(`2', "UNO","ONE",.)
replace `2'=subinword(`2', "DOS","TWO",.)
replace `2'=subinword(`2', "TRES","THREE",.)
replace `2'=subinword(`2', "CUATRO","FOUR",.)
replace `2'=subinword(`2', "CINCO","FIVE",.)
replace `2'=subinword(`2', "SEIS","SIX",.)
replace `2'=subinword(`2', "SIETE","SEVEN",.)
replace `2'=subinword(`2', "OCHO","EIGHT",.)
replace `2'=subinword(`2', "NUEVE","NINE",.)
replace `2'=subinword(`2', "DIEZ","TEN",.)
replace `2'=subinstr(`2', "11","ELEVEN",.)
replace `2'=subinstr(`2', "12","TWELVE",.)
replace `2'=subinstr(`2', "13","THIRTEEN",.)
replace `2'=subinstr(`2', "14","FOURTEEN",.)
replace `2'=subinstr(`2', "15","FIFTEEN",.)
replace `2'=subinstr(`2', "16","SIXTEEN",.)
replace `2'=subinstr(`2', "17","SEVETEEN",.)
replace `2'=subinstr(`2', "18","EIGHTEEN",.)
replace `2'=subinstr(`2', "19","NINETEEN",.)
replace `2'=subinstr(`2', "20","TWENTY",.)
replace `2'=subinstr(`2', "21","TWENTY ONE",.)
replace `2'=subinstr(`2', "22","TWENTY TWO",.)
replace `2'=subinstr(`2', "23","TWENTY THREE",.)
replace `2'=subinstr(`2', "24","TWENTY FOUR",.)
replace `2'=subinstr(`2', "25","TWENTY FIVE",.)
replace `2'=subinstr(`2', "26","TWENTY SIX",.)
replace `2'=subinstr(`2', "27","TWENTY SEVEN",.)
replace `2'=subinstr(`2', "28","TWENTY EIGHT",.)
replace `2'=subinstr(`2', "29","TWENTY NINE",.)
replace `2'=subinstr(`2', "30","THIRTY",.)
replace `2'=subinstr(`2', "31","THIRTY ONE",.)
replace `2'=subinstr(`2', "32","THIRTY TWO",.)
replace `2'=subinstr(`2', "33","THIRTY THREE",.)
replace `2'=subinstr(`2', "34","THIRTY FOUR",.)
replace `2'=subinstr(`2', "35","THIRTY FIVE",.)
replace `2'=subinstr(`2', "36","THIRTY SIX",.)
replace `2'=subinstr(`2', "37","THIRTY SEVEN",.)
replace `2'=subinstr(`2', "38","THIRTY EIGHT",.)
replace `2'=subinstr(`2', "39","THIRTY NINE",.)
replace `2'=subinstr(`2', "40","FOURTY",.)
replace `2'=subinstr(`2', "41","FOURTY ONE",.)
replace `2'=subinstr(`2', "42","FOURTY TWO",.)
replace `2'=subinstr(`2', "43","FOURTY THREE",.)
replace `2'=subinstr(`2', "44","FOURTY FOUR",.)
replace `2'=subinstr(`2', "45","FOURTY FIVE",.)
replace `2'=subinstr(`2', "46","FOURTY SIX",.)
replace `2'=subinstr(`2', "47","FOURTY SEVEN",.)
replace `2'=subinstr(`2', "48","FOURTY EIGHT",.)
replace `2'=subinstr(`2', "49","FOURTY NINE",.)
replace `2'=subinstr(`2', "50","FIFTY",.)
replace `2'=subinstr(`2', "51","FIFTY ONE",.)
replace `2'=subinstr(`2', "52","FIFTY TWO",.)
replace `2'=subinstr(`2', "53","FIFTY THREE",.)
replace `2'=subinstr(`2', "54","FIFTY FOUR",.)
replace `2'=subinstr(`2', "55","FIFTY FIVE",.)
replace `2'=subinstr(`2', "56","FIFTY SIX",.)
replace `2'=subinstr(`2', "57","FIFTY SEVEN",.)
replace `2'=subinstr(`2', "58","FIFTY EIGHT",.)
replace `2'=subinstr(`2', "59","FIFTY NINE",.)
replace `2'=subinstr(`2', "60","SIXTY",.)
replace `2'=subinstr(`2', "61","SIXTY ONE",.)
replace `2'=subinstr(`2', "62","SIXTY TWO",.)
replace `2'=subinstr(`2', "63","SIXTY THREE",.)
replace `2'=subinstr(`2', "64","SIXTY FOUR",.)
replace `2'=subinstr(`2', "65","SIXTY FIVE",.)
replace `2'=subinstr(`2', "66","SIXTY SIX",.)
replace `2'=subinstr(`2', "67","SIXTY SEVEN",.)
replace `2'=subinstr(`2', "68","SIXTY EIGHT",.)
replace `2'=subinstr(`2', "69","SIXTY NINE",.)
replace `2'=subinstr(`2', "70","SEVENTY",.)
replace `2'=subinstr(`2', "71","SEVENTY ONE",.)
replace `2'=subinstr(`2', "72","SEVENTY TWO",.)
replace `2'=subinstr(`2', "73","SEVENTY THREE",.)
replace `2'=subinstr(`2', "74","SEVENTY FOUR",.)
replace `2'=subinstr(`2', "75","SEVENTY FIVE",.)
replace `2'=subinstr(`2', "76","SEVENTY SIX",.)
replace `2'=subinstr(`2', "77","SEVENTY SEVEN",.)
replace `2'=subinstr(`2', "78","SEVENTY EIGHT",.)
replace `2'=subinstr(`2', "79","SEVENTY NINE",.)
replace `2'=subinstr(`2', "80","EIGHTY",.)
replace `2'=subinstr(`2', "81","EIGHTY ONE",.)
replace `2'=subinstr(`2', "82","EIGHTY TWO",.)
replace `2'=subinstr(`2', "83","EIGHTY THREE",.)
replace `2'=subinstr(`2', "84","EIGHTY FOUR",.)
replace `2'=subinstr(`2', "85","EIGHTY FIVE",.)
replace `2'=subinstr(`2', "86","EIGHTY SIX",.)
replace `2'=subinstr(`2', "87","EIGHTY SEVEN",.)
replace `2'=subinstr(`2', "88","EIGHTY EIGHT",.)
replace `2'=subinstr(`2', "89","EIGHTY NINE",.)
replace `2'=subinstr(`2', "90","NINETY",.)
replace `2'=subinstr(`2', "91","NINETY ONE",.)
replace `2'=subinstr(`2', "92","NINETY TWO",.)
replace `2'=subinstr(`2', "93","NINETY THREE",.)
replace `2'=subinstr(`2', "94","NINETY FOUR",.)
replace `2'=subinstr(`2', "95","NINETY FIVE",.)
replace `2'=subinstr(`2', "96","NINETY SIX",.)
replace `2'=subinstr(`2', "97","NINETY SEVEN",.)
replace `2'=subinstr(`2', "98","NINETY EIGHT",.)
replace `2'=subinstr(`2', "99","NINETY NINE",.)



drop if index(destination2, "SA DE CV")>0
drop if index(destination2, "S A DE C V")>0
drop if index(destination2, "SDE RL DE CV")>0
drop if index(destination2, "S DE R L DE CV")>0
drop if index(destination2, "S DE RL DE CV")>0
drop if index(destination2, "SDE RLDE CV")>0
drop if index(destination2, "S DE R L DE C V")>0
drop if index(destination2, "SADE CV")>0
drop if index(destination2, "SA / CV")>0
drop if index(destination2, "SACV")>0
drop if index(destination2, "SADE CV")>0
drop if index(destination2, "S A DE CV")>0
drop if index(destination2, "S DE RLDE CV")>0
drop if index(destination2, "S A DE CV")>0
drop if index(destination2, "S DE RL")>0
drop if index(destination2, "SDE RL")>0
drop if index(destination2, "S DE RLDE CV")>0
drop if index(destination2, "DE C V")>0
drop if index(destination2, "S DE  RL DE  CV")>0
drop if index(destination2, "S DE  RL DE CV")>0
drop if index(destination2, "S/RL/CV")>0
drop if index(destination2, "DECV")>0
drop if index(destination2, "SOCIEDAD COOPERATIVA")>0
drop if index(destination2, "S DE PR DE RL")>0
drop if index(destination2, "SPR DE RL")>0
drop if index(destination2, "S RL")>0
drop if index(destination2, "SPR RL")>0
drop if index(destination2, "S PR RL")>0
drop if index(destination2, "S P R  R L")>0
drop if index(destination2, "SRL")>0
drop if index(destination2, "S  P R  R L")>0
drop if index(destination2, "SPR")>0
drop if index(destination2, "SC  RL")>0
drop if index(destination2, "SCL")>0

	replace `2' = trim(`2')
	gen wordnumber=wordcount(destination2)
egen maxwordnumber=max(wordnumber)
gen destination3=""
gen nslash=0
	local j=1
while `j'<=maxwordnumber{
gen dest`j'=word(destination2,`j')
gen slash`j'=(dest`j'=="/"|dest`j'=="C/O"|dest`j'=="Y/O" |dest`j'=="DBA" )
replace destination3=destination3+" "+dest`j'
replace nslash=nslash+slash`j'
drop dest`j'
drop slash`j'
local j=`j'+1
}
replace `2'=destination3
drop destination3
replace `2' = trim(`2')
egen id3=group(`2')
save `5', replace
collapse (sum) valorendolares, by (`2')
capture sort `2'
     save `4', replace
   }
end




/* Clean the destination name*/

capture program drop cleanSlash
program define cleanSlash
   {
local i=`1'
while `i'<=`2'{

cleanNames "${temp}custom_dest_registry2004to2011_hs`i'_rfcfrac destination2" destination temp temp2

use temp2, replace
egen maxnslash=max(nslash)
gen destnametemp1=destination2
drop maxwordnumber
gen slash=0

local k=1
while `k'<= maxnslash{
local l=`k'+1
capture gen destnametemp`l'=destnametemp`k' 
capture gen destnameclean`k'=destnametemp`k' if nslash<`k'
capture replace destnameclean`k'=destnametemp`k' if nslash<`k'
capture replace destnametemp`k'="" if nslash<`k'

replace wordnumber=wordcount(destnametemp`k')
egen maxwordnumber=max(wordnumber)
capture gen destnameclean`l'=""

replace slash=0
local j=0
while `j'<=maxwordnumber-1{
local p=maxwordnumber-`j'
replace slash=`p' if word(destnametemp`k',`p')=="C/O"
replace slash=`p' if word(destnametemp`k',`p')=="/"
replace slash=`p' if word(destnametemp`k',`p')=="Y/O"
replace slash=`p' if word(destnametemp`k',`p')=="DBA"
local j=`j'+1
}

local j=1
while `j'<=maxwordnumber{
gen dest`j'_bs=word(destnametemp`k',`j') if `j'<slash
gen dest`j'_as=word(destnametemp`k',`j') if `j'>slash
local j=`j'+1
}

replace destnametemp`k'=""
replace destnametemp`l'=""

local j=1
while `j'<=maxwordnumber{
replace destnametemp`k'=destnametemp`k'+" "+dest`j'_bs if nslash>=`k'
drop dest`j'_bs
replace destnametemp`l'=destnametemp`l'+" "+dest`j'_as if nslash>=`k'
drop dest`j'_as
local j=`j'+1
}

replace destnametemp`k'=trim(destnametemp`k')
replace destnametemp`l'=trim(destnametemp`l')
replace destnameclean`k'=destnametemp`k' if  destnametemp`k'!="" & nslash>=`k'
replace destnameclean`l'=destnametemp`l' if  destnametemp`k'!="" & nslash>=`k'
drop maxwordnumber
local k=`k'+1
}

drop destnametemp*** maxnslash slash wordnumber
save "${temp}custom_dest_registry2004to2011_hs`i'_slashcleaned_rfcfrac", replace

egen maxnslash=max(nslash)
gen maxnsep=maxnslash+1

local j=2
while `j'<=maxnsep{
capture keep if destnameclean`j'!=""
capture drop if length(destnameclean`j')==1
capture drop if `j'=="USA"
capture replace destnameclean1=destnameclean`j'
capture "${temp}save custom_dest_registry2004to2011_hs`i'_slashcleaned_temp`j'_rfcfrac", replace
local j=`j'+1
}

use "${temp}custom_dest_registry2004to2011_hs`i'_slashcleaned_rfcfrac", replace


egen maxnslash=max(nslash)
gen maxnsep2=maxnslash+1

save, replace

local j=2
while `j'<=maxnsep2{
capture use "${temp}custom_dest_registry2004to2011_hs`i'_slashcleaned_rfcfrac", replace
capture append using "${temp}custom_dest_registry2004to2011_hs`i'_slashcleaned_temp`j'_rfcfrac"
capture save, replace
capture erase "${temp}custom_dest_registry2004to2011_hs`i'_slashcleaned_temp`j'_rfcfrac.dta"
local j=`j'+1
}

capture drop destnameclean2-maxnsep
capture gen destnameclean1=destination2
sort destnameclean1
egen id4=group(destnameclean1)
save, replace
local i=`i'+1
}
   }
end



/* Clean zipcode, municipality, and iddest (ein) */

capture program drop loopHSCleanIddest_1
program define loopHSCleanIddest_1
   {
local i=`1'
while `i'<=`2'{
use "${temp}custom_dest_registry2004to2011_hs`i'_slashcleaned_rfcfrac", replace

/*** cleaning zipcode ***/
gen cp2=cp
replace cp2=subinstr(cp2, "-", "",.)
replace cp2=substr(cp2, 1, 5)
destring cp2, replace force
replace cp2=. if cp2==0
replace cp2=. if cp2==1
rename cp2 zip
sort zip

merge m:1 zip using "${rawdata}usziplist_zp4.dta"

drop if _merge==2
gen nouszip=(zip!=. & _merge==1)
drop _merge

rename zip cp2

gen cp_9=cp
replace cp_9="" if length(cp_9)!=10

gen zip_complete=cp
replace zip_complete=subinstr(zip_complete, "-", "",.)
replace zip_complete=substr(zip_complete, 1, 5)
replace zip_complete=cp_9 if cp_9!=""
drop cp_9


/*** cleaning street number ***/
gen number_ext2=number_ext
replace number_ext2="" if number_ext2=="0"
replace number_ext2="" if number_ext2=="."
replace number_ext2="" if number_ext2=="-"
replace number_ext2="" if number_ext2=="SN"
replace number_ext2="" if number_ext2=="S N"
replace number_ext2="" if number_ext2=="S/N"
replace number_ext2="" if number_ext2=="SIN NUMERO"
replace number_ext2=trim(number_ext2)

gen number_int2=number_int
replace number_int2="" if number_int2=="0"
replace number_int2="" if number_int2=="."
replace number_int2="" if number_int2=="-"
replace number_int2="" if number_int2=="SN"
replace number_int2="" if number_int2=="S N"
replace number_int2="" if number_int2=="S/N"
replace number_int2=""  if number_int2=="SIN NUMERO"
replace number_int2="" if number_int2==number_ext2
replace number_int2=trim(number_int2)

/*** cleaning iddest(ein) 
creating new id called iddest2 ***/

gen iddest2=iddest
replace iddest2=subinstr(iddest2, " ", "",.)
replace iddest2=subinstr(iddest2, "-", "",.)
replace iddest2=trim(iddest2)

gen length=length(iddest2)
replace iddest2=substr(iddest2, 1, 9) if length==11
replace iddest2="" if length!=9
destring iddest2, gen(iddest3) force	
replace iddest2="" if iddest3==.
replace iddest2="" if iddest3==0 
replace iddest2="" if iddest3==123456789
replace iddest2="" if iddest3==111111111
replace iddest2="" if iddest3==222222222
replace iddest2="" if iddest3==333333333
replace iddest2="" if iddest3==444444444
replace iddest2="" if iddest3==555555555
replace iddest2="" if iddest3==666666666
replace iddest2="" if iddest3==777777777
replace iddest2="" if iddest3==888888888
replace iddest2="" if iddest3==999999999

/*** checking if ein has correct first two digit***/		

foreach name in "07" "08" "09" "17" "18" "19" "28" "29" "49" "69" "70" "78" "79" "89" "96" "97"{
replace iddest2="" if substr(iddest2, 1, 2)=="`name'"
        }
		
/*** checking if zipcode is written wrongly***/		

gen iddest2_5=substr(iddest2, 1, 5)
destring iddest2_5, replace
replace iddest2="" if iddest2_5==cp2


/*** cleaning municipality (taking out state component) ***/

replace mun=upper(mun)
replace mun=subinstr(mun, ",", "",.)
replace mun=subinstr(mun, ".", "",.)
replace mun=subinstr(mun, ";", "",.)
replace mun=subinword(mun, "NUEVA", "NEW",.)
replace mun=trim(mun)
gen lastwmun=word(mun, -1)

foreach name in "AK" "AL" "AR" "AZ" "CA" "CAL" "CO" "CT" "DC" "DE" "FL" "GA" "HI" "IA" "ID" "IL" "IN" "KS" "KY" "LA" "MA"  "MD" "ME" "MI" "MN" "MO" "MS" "MT" "NC" "ND" "NE" "NH" "NJ" "NM" "NV" "NY" "OH" "OK" "OR" "PA" "RI" "SC" "SD" "TN" "TX" "UT" "VA" "VT" "WA" "WI" "WV" "WY"  "USA" "EUA"{
replace mun=subinword(mun, "`name'","",.)
replace mun=trim(mun)
        }

		gen state_munname=""
		foreach name in "AK" "AL" "AR" "AZ" "CA" "CO" "CT" "DC" "DE" "FL" "GA" "HI" "IA" "ID" "IL" "IN" "KS" "KY" "LA" "MA"  "MD" "ME" "MI" "MN" "MO" "MS" "MT" "NC" "ND" "NE" "NH" "NJ" "NM" "NV" "NY" "OH" "OK" "OR" "PA" "RI" "SC" "SD" "TN" "TX" "UT" "VA" "VT" "WA" "WI" "WV" "WY" {
replace state_munname=lastwmun if lastwmun=="`name'"
replace state_munname=trim(state_munname)
        }
		
		foreach name in "ALASKA" "ALABAMA" "ARKANSAS" "ARIZONA" "CALIFORNIA" "COLORADO" "CONNETICUT" "DC" "DELAWARE" "FLORIDA" "GEORGIA" "HAWAII" "IOWA" "IDAHO" "ILLINOIS" "INDIANA" "KANSAS" "KENTUCKY" "LOUISIANA" "MASSACHUSETTS" "MASSACHUSETS" "MARYLAND" "MAINE" "MICHIGAN" "MINNESOTA" "MISSOURI" "MISSISSIPPI" "MONTANA" "NOTH CAROLINA" "NORTH DAKOTA" "NEBRASKA" "NEW HAMPSHIRE" "NEW JERSEY" "NEW MEXICO" "NEVADA" "NEW YORK" "OHIO" "OKLAHOMA" "OREGON" "PENNSYLVANIA" "RHODE ISLAND" "SOUTH CAROLINA" "SOUTH DAKOTA" "TENNESSEE" "TEXAS" "UTAH" "VIRGINIA" "VERMONT" "WASHINGTON" "WISCONSIN" "WEST VIRGINIA" "WYOMING" "ESTADOS UNIDOS  AMERICA"{
replace mun=subinword(mun, "`name'","",.) if subinword(mun, "`name'","",.)!=""
replace mun=trim(mun)
        }

		foreach name in "ALASKA" "ALABAMA" "ARKANSAS" "ARIZONA" "CALIFORNIA" "COLORADO" "CONNETICUT" "DC" "DELAWARE" "FLORIDA" "GEORGIA" "HAWAII" "IOWA" "IDAHO" "ILLINOIS" "INDIANA" "KANSAS" "KENTUCKY" "LOUISIANA" "MASSACHUSETTS" "MARYLAND" "MAINE" "MICHIGAN" "MINNESOTA" "MISSOURI" "MISSISSIPPI" "MONTANA" "NOTH CAROLINA" "NORTH DAKOTA" "NEBRASKA" "NEW HAMPSHIRE" "NEW JERSEY" "NEW MEXICO" "NEVADA" "NEW YORK" "OHIO" "OKLAHOMA" "OREGON" "PENNSYLVANIA" "RHODE ISLAND" "SOUTH CAROLINA" "SOUTH DAKOTA" "TENNESSEE" "TEXAS" "UTAH" "VIRGINIA" "VERMONT" "WASHINGTON" "WISCONSIN" "WEST VIRGINIA" "WYOMING" {
replace state_munname=lastwmun if lastwmun=="`name'"
replace state_munname=trim(state_munname)
        }

		
gen wmunnum=wordcount(mun)
egen maxwordnumber=max(wmunnum)
		
local j=1
while `j'<=maxwordnumber{
gen mun`j'=word(mun,`j')
replace mun`j'=subinstr(mun`j', "USA", ".",.) if `j'==wmunnum
replace mun`j'=subinstr(mun`j', "TX", ".",.) if `j'==wmunnum
replace mun`j'=subinstr(mun`j', "TEXAS", ".",.) if `j'==wmunnum
replace mun`j'=subinstr(mun`j', "TEX", ".",.) if `j'==wmunnum
destring mun`j', force generate(aaa`j')
replace mun`j'="" if aaa`j'!=.
drop aaa`j'
local j=`j'+1
}
		

replace mun=mun1

local j=2
while `j'<=maxwordnumber{
replace mun=mun+""+mun`j'
local j=`j'+1
}

replace mun=trim(mun)
replace mun=subinstr(mun, ".", "",.)

		
rename cp2 Zipcode
sort Zipcode
merge m:1 Zipcode using "${rawdata}uszipcode_county_wide.dta"
drop if _merge==2
rename state state_zip
rename county1 conty_zip1
rename county2 conty_zip2
rename county3 conty_zip3
rename Zipcode cp2
drop _merge

sort mun
merge m:1 mun using "${rawdata}uszipmun_state_wide.dta"
drop if _merge==2
drop _merge
save "${temp}custom_dest_registry2004to2011_cleaned_hs`i'_rfcfracmaquila", replace

local i=`i'+1
}

   }
end

/*** deleting transactions that are by individuals and by transportation companies***/

capture program drop prepareForZP4_1
program define prepareForZP4_1
   {
local i=`1'
while `i'<=`2'{

use "${temp}custom_dest_registry2004to2011_cleaned_hs`i'_rfcfracmaquila.dta", clear

keep if destnameclean1!=""
keep if firm==1
keep if transportista==0

replace mun=trim(mun)

gen cleanstreet=street
replace cleanstreet=subinstr(cleanstreet, ".", " ",.)
replace cleanstreet=subinstr(cleanstreet, ",", " ",.)
replace cleanstreet=trim(cleanstreet)
replace cleanstreet=subinword(cleanstreet, "P O", "PO", .)

gen firstst=word(cleanstreet, 1)
replace firstst=upper(firstst)
replace firstst=subinstr(firstst, "-", "",.)
replace firstst=trim(firstst)
gen aaa=(firstst==number_ext2)
gen bbb=(firstst==number_int2)
gen po=(index( cleanstreet, "PO BOX")>0 | index( cleanstreet, "PO  BOX")>0 )


gen lastst=word(cleanstreet, -1)
replace lastst=subinstr(lastst, "-", "",.)
replace lastst=trim(lastst)
gen ccc=(lastst==number_ext2)
gen ddd=(lastst==number_int2)


gen streetext=street 
replace streetext=number_ext2+" "+street if aaa==0 & ccc==0 & po==0
replace streetext=streetext+" "+number_int2 if bbb==0 & ddd==0 & po==0
 

replace streetext=subinstr(streetext, ".", " ",.)
replace streetext=subinstr(streetext, ",", " ",.)
replace streetext=trim(streetext)

replace zip_complete="" if zip_complete=="."
replace zip_complete="" if zip_complete=="0"

replace state_munname="TX" if state_munname=="TEXAS"
replace state_zip=state_munname if state_zip==""

collapse (sum) ntrans valorendolares, by (rfc maquila frac year month destnameclean1 destination2 iddest2 streetext mun cp2 small)
gen id5=_n
gen addressid=_n
sort addressid
gen hs2=`i'
save "${temp}address_beforezp4all_hs`i'_rfcfracmaquila", replace
local i=`i'+1
}
   }
end

getDestinationHS 50 63
cleanSlash 50 63
loopHSCleanIddest_1 50 63
prepareForZP4_1 50 63


/*************************************************************************
**********************************(5)************************************* 
Merging Back the unified US importer ID
***************************************************************************/


forvalues i=50/63{

use ${temp}address_beforezp4all_hs`i'_rfcfracmaquila.dta

rename streetext addressoriginal
merge m:1 addressoriginal cp2 mun using ${output_data}address_beforeafterzp4.dta
keep if hs2==`i'
replace addressfinal=streetext if addressfinal==""
replace cityfinal=mun if cityfinal==""

rename destination2 name
rename addressfinal address
rename cityfinal city
rename zipfinal zip

replace address=upper(address)
replace city=upper(city)

sort name address city zip rfc frac addressoriginal cp2 mun month year valorendolares maquila
by name address city zip rfc frac addressoriginal cp2 mun month year valorendolares maquila: gen a=_n 
keep if a==1

collapse (sum)valorendolares, by(name address city zip rfc frac month year maquila)
egen addressidzp4=group(address city zip)

save ${temp}adress_afterzp4_hs`i'_fracmaquila, replace
}


forvalues i=50/63{
use ${rawdata}matched_hs`i'_common.dta, replace
drop _merge
rename f_clid_section newclid2
sort name address city zip newclid2
by name address city zip newclid2: gen a=_n
keep if a==1
by name address city zip: gen b=_N
gen indicator=1 if b>1
egen id_repeat=min(newclid2), by (name address city zip)
replace newclid2=id_repeat if indicator==1
sort name address city zip newclid2
by name address city zip newclid2: gen c=_n
keep if c==1
drop a  b c vol_dollars n_trans earliest latest
merge 1:m name address city zip using ${temp}adress_afterzp4_hs`i'_fracmaquila
drop if _merge==2
save ${temp}rfc_id_merge_hs`i'_fracmaquila, replace
}

**** Generate importer-exporter values by  year at the product level.

use ${temp}rfc_id_merge_hs50_fracmaquila, replace
forvalues i=51/63{
append using "${temp}rfc_id_merge_hs`i'_fracmaquila"
}

drop if month<6
rename valorendolares value_exporter_importer

drop _merge
merge m:1 newclid2 using ${output_data}naics2007_5063_newclid2.dta
drop _merge

collapse (sum) value_exporter_importer, by(rfc year hs2 newclid2 frac naics2007 maquila)

save ${output_data}exporter_importer_50_63_productlevel_new_junedecember_maquila, replace


***Generate exports from 501_551_505 by firm-product-maquila exports by month-year

foreach type in "06" "07" "08" "09" "10" "11" "12"{
use ${temp}501_551_`type'2004use, replace

gen definite=.
replace definite=1 if cveddocument=="A1" 
replace definite=2 if cveddocument=="RT" 
replace definite=1 if cveddocument=="A1"
replace definite=1 if cveddocument=="G1"
replace definite=1 if cveddocument=="G6"
replace definite=1 if cveddocument=="G7"
replace definite=1 if cveddocument=="L1"
replace definite=1 if cveddocument=="T1"
replace definite=1 if cveddocument=="F8"
replace definite=1 if cveddocument=="F9"

replace definite=2 if cveddocument=="I1"
replace definite=2 if cveddocument=="J1"
replace definite=2 if cveddocument=="J2"
replace definite=2 if cveddocument=="RT"

replace definite=4 if cveddocument=="BA"
replace definite=4 if cveddocument=="BE"
replace definite=4 if cveddocument=="BM"
replace definite=4 if cveddocument=="BO"
replace definite=4 if cveddocument=="J3"
replace definite=4 if cveddocument=="BP"
replace definite=4 if cveddocument=="BR"
replace definite=4 if cveddocument=="J4"

gen maquila=0
replace maquila=1 if definite==2 | definite==4


gen year=2004
keep if cvepaisorigendestino=="USA"&cvepaiscomprador=="USA"

destring fraccionarancelaria, replace force
gen ind=1 if fraccionarancelaria<10000000
replace ind=. if fraccionarancelaria<1000000
replace ind=1 if fraccionarancelaria<100000
tostring fraccionarancelaria, replace
gen hs2=substr(fraccionarancelaria, 1, 2)
replace hs2=substr(fraccionarancelaria, 1, 1) if ind==1

collapse (sum) valorendolares, by (rfc hs2 frac year pataduanal numpmento cvesecadua maquila)

gen combination=0
merge m:1 pataduanal numpmento cvesecadua using ${temp}505-`type'2004exp_single
replace combination=1 if _merge==3
drop _merge
destring hs2, replace force
keep if hs2>49&hs2<64
gen value_combination=valorendolares if combination==1
save ${temp}combination_frac_50_63_`type'2004, replace
}


forvalues j=5/9{
foreach type in "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12"{
use ${temp}501_551_`type'200`j'use, replace

gen year=200`j'
keep if cvepaisorigendestino=="USA"&cvepaiscomprador=="USA"

destring fraccionarancelaria, replace force
gen ind=1 if fraccionarancelaria<10000000
replace ind=. if fraccionarancelaria<1000000
replace ind=1 if fraccionarancelaria<100000
tostring fraccionarancelaria, replace
gen hs2=substr(fraccionarancelaria, 1, 2)
replace hs2=substr(fraccionarancelaria, 1, 1) if ind==1

gen definite=.
replace definite=1 if cveddocument=="A1" 
replace definite=2 if cveddocument=="RT" 
replace definite=1 if cveddocument=="A1"
replace definite=1 if cveddocument=="G1"
replace definite=1 if cveddocument=="G6"
replace definite=1 if cveddocument=="G7"
replace definite=1 if cveddocument=="L1"
replace definite=1 if cveddocument=="T1"
replace definite=1 if cveddocument=="F8"
replace definite=1 if cveddocument=="F9"

replace definite=2 if cveddocument=="I1"
replace definite=2 if cveddocument=="J1"
replace definite=2 if cveddocument=="J2"
replace definite=2 if cveddocument=="RT"

replace definite=4 if cveddocument=="BA"
replace definite=4 if cveddocument=="BE"
replace definite=4 if cveddocument=="BM"
replace definite=4 if cveddocument=="BO"
replace definite=4 if cveddocument=="J3"
replace definite=4 if cveddocument=="BP"
replace definite=4 if cveddocument=="BR"
replace definite=4 if cveddocument=="J4"

gen maquila=0
replace maquila=1 if definite==2 | definite==4
collapse (sum) valorendolares, by (rfc hs2 frac year pataduanal numpmento cvesecadua maquila)
gen combination=0
merge m:1 pataduanal numpmento cvesecadua using ${temp}505-`type'200`j'exp_single
replace combination=1 if _merge==3
drop _merge
destring hs2, replace force
keep if hs2>49&hs2<64
gen value_combination=valorendolares if combination==1
save "${temp}combination_frac_50_63_`type'200`j'", replace
}
}

forvalues j=0/1{
foreach type in "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12"{
use ${temp}501_551_`type'201`j'use, replace

gen year=201`j'
keep if cvepaisorigendestino=="USA"&cvepaiscomprador=="USA"

destring fraccionarancelaria, replace force
gen ind=1 if fraccionarancelaria<10000000
replace ind=. if fraccionarancelaria<1000000
replace ind=1 if fraccionarancelaria<100000
tostring fraccionarancelaria, replace
gen hs2=substr(fraccionarancelaria, 1, 2)
replace hs2=substr(fraccionarancelaria, 1, 1) if ind==1

gen definite=.
replace definite=1 if cveddocument=="A1" 
replace definite=2 if cveddocument=="RT" 
replace definite=1 if cveddocument=="A1"
replace definite=1 if cveddocument=="G1"
replace definite=1 if cveddocument=="G6"
replace definite=1 if cveddocument=="G7"
replace definite=1 if cveddocument=="L1"
replace definite=1 if cveddocument=="T1"
replace definite=1 if cveddocument=="F8"
replace definite=1 if cveddocument=="F9"

replace definite=2 if cveddocument=="I1"
replace definite=2 if cveddocument=="J1"
replace definite=2 if cveddocument=="J2"
replace definite=2 if cveddocument=="RT"

replace definite=4 if cveddocument=="BA"
replace definite=4 if cveddocument=="BE"
replace definite=4 if cveddocument=="BM"
replace definite=4 if cveddocument=="BO"
replace definite=4 if cveddocument=="J3"
replace definite=4 if cveddocument=="BP"
replace definite=4 if cveddocument=="BR"
replace definite=4 if cveddocument=="J4"

gen maquila=0
replace maquila=1 if definite==2 | definite==4
collapse (sum) valorendolares, by (rfc hs2 frac year pataduanal numpmento cvesecadua maquila)
gen combination=0
merge m:1 pataduanal numpmento cvesecadua using ${temp}505-`type'201`j'exp_single
replace combination=1 if _merge==3
drop _merge
destring hs2, replace force
keep if hs2>49&hs2<64
gen value_combination=valorendolares if combination==1
save ${temp}combination_frac_50_63_`type'201`j', replace
}
}


***dropping individuals and transport companies

use ${temp}combination_frac_50_63_062004, replace
foreach type in "07" "08" "09" "10" "11" "12"{
append using "${temp}combination_frac_50_63_`type'2004"
}
forvalues j=5/9{
foreach type in "06" "07" "08" "09" "10" "11" "12"{
append using "${temp}combination_frac_50_63_`type'200`j'"
}
}
forvalues j=0/1{
foreach type in "06" "07" "08" "09" "10" "11" "12"{
append using "${temp}combination_frac_50_63_`type'201`j'"
}
}

drop if rfc==""
replace rfc=subinstr(rfc," ","",.)
gen frfc=substr(rfc, 4, 1)
destring frfc, replace force
gen individual=1 if frfc==.
replace individual=1 if rfc=="EXTR920901TS4"
replace individual=1 if rfc=="AADC930401KX7"
replace individual=1 if rfc=="ESTU930401QZ7"
replace individual=1 if rfc=="ESTU930401QZ7"
replace individual=1 if rfc=="EJID930401SJ5"
replace individual=1 if rfc=="EMB930401KH4"
replace individual=1 if rfc=="OIN9304013N0"
replace individual=1 if rfc=="EDM930614781"
drop if individual==1

rename rfc rfctransta
sort rfctransta
merge m:1 rfctransta using "${output_data}transportista_rfclist.dta"
drop if _merge==3
drop _merge
rename rfctransta rfc
collapse (sum)value_combination valorendolares, by(frac hs2 rfc year maquila)
gen bbb=valorendolares*maquila
gen ccc=value_combination*maquila
bys rfc frac year: egen maquilavalue=sum(bbb)
bys rfc frac year: egen maquila_comb_value=sum(ccc)
duplicates drop
save ${output_data}501_551_505_frac_50_63_junedecember_maquila_withoutprice, replace


*** Merge importer information and keep exporters for which we know partners for more than 80% of its trade. 
use ${output_data}501_551_505_frac_50_63_junedecember_maquila_withoutprice, replace
destring hs2, replace force
merge 1:m rfc hs2 year frac maquila using ${output_data}exporter_importer_50_63_productlevel_new_junedecember_maquila
gen hs6=substr(frac, 1,6)
drop if _merge==2
gen both1=1 if _merge==3
replace both1=0 if both1==.
keep if both!=0
drop both1

bys rfc hs2 year frac maquila: gen exporter_maquila_count=(_n==1)
gen value_exporter_maquila_temp=valorendolares if exporter_maquila_count==1 
replace value_exporter_maquila_temp=0 if exporter_maquila_count==0 /*avoid double counting*/
egen value_exporter=total(value_exporter_maquila_temp), by(rfc hs2 frac year) /*summing maquila and non-maquila**/
bys rfc hs2 year frac: gen exporter_count=_n
replace exporter_count=0 if exporter_count!=1 
gen value_exporter_temp=value_exporter
replace value_exporter_temp=0 if exporter_count!=1
gen maquilavalue_temp=maquilavalue
replace maquilavalue_temp=0 if exporter_count!=1
collapse  (sum) value_exporter_temp maquilavalue_temp value_exporter_importer, by(rfc hs6 year newclid2 maquila)
bys rfc hs6 year: egen valorendolares=sum(value_exporter_temp)
bys rfc hs6 year: egen maquilavalue=sum(maquilavalue_temp)
bys rfc hs6 year: egen knownvalue=sum(value_exporter_importer)
gen knownratio=knownvalue/valorendolares
gen known=0
replace known=1 if knownratio>=0.8
save ${temp}matched_exp_imp_hs6_junedecember_maquila, replace

****Create restricted data without price with maquila
use ${temp}matched_exp_imp_hs6_junedecember_maquila, replace

***Drop observations if less than two exporters by hs6 product
bys year hs6 rfc: gen exporter=_n
replace exporter=0 if exporter>1
egen exporter_number=total(exporter), by(year hs6)
replace exporter_number=. if year!=2004
egen exporter_num=min(exporter_number), by( hs6)
drop if exporter_num<2

***Drop observations if less than two importers by hs6 product
bys year hs6 newclid2: gen importer=_n
replace importer=0 if importer>1
egen importer_number=total(importer), by(year hs6)
replace importer_number=. if year!=2004
egen importer_num=min(importer_number), by( hs6)
drop if importer_num<2

save ${temp}matched_exp_imp_hs6_junedecember_maquila_restrict, replace




capture program drop getDestinationHS
program define getDestinationHS
   {

/*** Assign information on whether a transaction is made by individual and by transportation company***/   



foreach type in  "2005" "2006" "2007"   "2008"   "2009" "2010" "2011"{
foreach mm in "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12"{
use "${temp}destination-info-pair-`mm'`type'complete_multi2price", replace
drop if multi==1
drop multi
keep if cvepaisorigendestino=="USA"
keep if cvepaiscomp=="USA"
gen frfc=substr(rfc, 4, 1)
destring frfc, replace force
gen firm=(frfc!=.)
drop frfc
sort rfc
rename rfc rfctransta 
merge m:1 rfctransta using "{output_data}transportista_rfclist.dta"
gen transportista=0 
replace transportista=1 if _merge==3
drop if _merge==2
drop _merge
rename rfctransta rfc
keep valorendolares canto  cveunidad preciounitario  rfc fraccionarancelaria cveddocument destination iddest street number_int number_ext cp mun firm transportista destfile small year month
save "${temp}destination-info-pair-`mm'`type'complete2_rfcprice", replace
}

use "${temp}destination-info-pair-01`type'complete2_rfcprice", replace
save "${temp}destination-info-pair-`type'complete2_rfcprice", replace

foreach mm in "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12"{
use "${temp}destination-info-pair-`type'complete2_rfcprice", replace
append using "${temp}destination-info-pair-`mm'`type'complete2_rfcprice"
save, replace
}
}


foreach type in "2004" {
foreach mm in "06" "07" "08" "09" "10" "11" "12"{
use "${temp}destination-info-pair-`mm'`type'complete_multi2price", replace
keep if cvepaisorigendestino=="USA"
keep if cvepaiscomp=="USA"
drop if multi==1
drop multi
gen frfc=substr(rfc, 4, 1)
destring frfc, replace force
gen firm=(frfc!=.)
drop frfc
sort rfc
rename rfc rfctransta 
merge m:1 rfctransta using "{output_data}transportista_rfclist.dta"
gen transportista=0 
replace transportista=1 if _merge==3
drop if _merge==2
drop _merge
rename rfctransta rfc
keep valorendolares canto  cveunidad preciounitario  rfc fraccionarancelaria cveddocument destination iddest street number_int number_ext cp mun firm transportista destfile small year month
save "${temp}destination-info-pair-`mm'`type'complete2_rfcprice", replace
}

use "${temp}destination-info-pair-06`type'complete2_rfcprice", replace
save "${temp}destination-info-pair-`type'complete2_rfcprice", replace

foreach mm in  "07" "08" "09" "10" "11" "12"{
use "${temp}destination-info-pair-`type'complete2_rfcprice", replace
append using "${temp}destination-info-pair-`mm'`type'complete2_rfcprice"
save, replace
}
}




local i=`1'
while `i'<=`2'{
foreach type in  "2004" "2005" "2006" "2007"  "2008"  "2009" "2010" "2011"{
use "${temp}destination-info-pair-`type'complete2_rfcprice", replace

gen definite=.
replace definite=1 if cveddocument=="A1" 
replace definite=2 if cveddocument=="RT" 
replace definite=1 if cveddocument=="A1"
replace definite=1 if cveddocument=="G1"
replace definite=1 if cveddocument=="G6"
replace definite=1 if cveddocument=="G7"
replace definite=1 if cveddocument=="L1"
replace definite=1 if cveddocument=="T1"
replace definite=1 if cveddocument=="F8"
replace definite=1 if cveddocument=="F9"

replace definite=2 if cveddocument=="I1"
replace definite=2 if cveddocument=="J1"
replace definite=2 if cveddocument=="J2"
replace definite=2 if cveddocument=="RT"

replace definite=4 if cveddocument=="BA"
replace definite=4 if cveddocument=="BE"
replace definite=4 if cveddocument=="BM"
replace definite=4 if cveddocument=="BO"
replace definite=4 if cveddocument=="J3"
replace definite=4 if cveddocument=="BP"
replace definite=4 if cveddocument=="BR"
replace definite=4 if cveddocument=="J4"

gen maquila=0
replace maquila=1 if definite==2 | definite==4
gen dzero=(frac<10000000)
tostring frac, replace
replace frac="0"+frac if dzero==1
gen hs2=substr(frac, 1, 2)
destring hs2, replace
keep if hs2==`i'
keep rfc maquila valorendolares canto cveunidad precio frac destination iddest street number_ext number_int cp mun firm destfile transportista small year month
save "${temp}destination_us_`type'detail_hs`i'_rfcfracprice.dta", replace
}

local i=`i'+1
}


local i=`1'
while `i'<=`2'{
use "${temp}destination_us_2004detail_hs`i'_rfcfracprice.dta", replace
save "${temp}custom_dest_registry2004to2011_hs`i'_byyear_rfcfracprice", replace

foreach type in "2005" "2006" "2007"  "2008" "2009" "2010" "2011"{
use "${temp}custom_dest_registry2004to2011_hs`i'_byyear_rfcfracprice", replace
append using "${temp}destination_us_`type'detail_hs`i'_rfcfracprice.dta"
save, replace
}

use "${temp}custom_dest_registry2004to2011_hs`i'_byyear_rfcfracprice", replace
sort destination iddest cp street number_ext number_int mun
gen hs=`i'
gen id1=_n
egen id2=group(destination)
save "${temp}custom_dest_registry2004to2011_hs`i'_rfcfracprice", replace
local i=`i'+1
}
   }
end


/* Second part cleans slash "/" "c/o" and "y/o, creating several different names.*/


capture program drop cleanNames
program define cleanNames
   {
     use `1', replace
     display "cleanNames"`3'
	 gen `2'=`3'

     replace `2' = upper(`2')
	replace `2' = trim(`2')
	replace `2'="" if length(`2')==1
replace `2'=subinstr(`2', ".", "",.)
replace `2'=subinstr(`2', ",", " ",.)
replace `2'=subinstr(`2', "  "," ",.)
replace `2'=subinstr(`2', "`"," ",.)
replace `2'=subinstr(`2', "(", "",.)
replace `2'=subinstr(`2', ")", "",.)
     replace `2' = subinstr(`2',"-","",.) 
     replace `2' = subinstr(`2',"'","",.)
	 replace `2' = subinstr(`2',"//","/",.) 
     replace `2' = subinstr(`2',"/"," / ",.)
     replace `2' = subinstr(`2',"&"," & ",.)
	 replace `2' = subinword(`2',"CORP"," ",.)
	 replace `2' = subinword(`2',"CO"," ",.)
	 replace `2' = subinword(`2',"INC"," ",.)
	 replace `2' = subinword(`2',"LTD"," ",.)
	 replace `2' = subinword(`2',"COMPANY"," ",.)
	 replace `2' = subinword(`2',"INC."," ",.)
	 replace `2' = subinword(`2',"LTD."," ",.)
	 replace `2' = subinword(`2',"LLC."," ",.)
	 replace `2' = subinword(`2',"LLC"," ",.)
	 replace `2' = subinword(`2',"LP"," ",.)
	 replace `2' = subinword(`2',"CORP."," ",.)
	 replace `2' = subinword(`2',"CO."," ",.)
	 replace `2' = subinword(`2',"COINC"," ",.)
	 replace `2' = subinword(`2',"COLLC"," ",.)
	 gen incend=(substr(word(`2', -1), -3, .)=="INC")
	 replace  `2' = reverse(`2') if incend==1
	 replace  `2' = subinstr( `2',"CNI"," ",1) if incend==1 & length(word(`2', 1))>4
     replace  `2' = reverse(`2') if incend==1
	 drop incend
	 replace `2' = subinword(`2',"CORPORATION"," ",.)
	 replace `2' = subinword(`2',"&","AND",.)
	 	 replace `2' = subinword(`2',"AND"," ",.)
	 replace `2' = subinword(`2',"CORPORATION."," ",.)
	 replace `2' = subinword(`2',"INCORPORATED"," ",.)
     replace `2' = subinword(`2',"INCORPORATION"," ",.)
     replace `2' = subinword(`2',"INCORPORATE"," ",.)
     replace `2' = subinword(`2',"INCORP"," ",.)
	 replace `2' = subinword(`2',"INTERNATIONALINC","INTERNATIONAL",.)
	 replace `2' = subinword(`2',"BROS","BROTHERS",.)
	 replace `2' = subinword(`2',"PROD","PRODUCTS",.)
	 	 replace `2' = subinword(`2',"PRODS","PRODUCTS",.)
		 	 replace `2' = subinword(`2',"DIST","DISTRIBUTORS",.)
	 replace `2' = subinword(`2',"USAINC","USA",.)
	 replace `2' = subinword(`2',"THE"," ",.)
	 replace `2' = subinword(`2',"HOLDING"," ",.)
	 replace `2' = subinword(`2',"HOLDINGS"," ",.)
	 replace `2' = subinword(`2',"COMPANIES"," ",.)
	 replace `2' = subinword(`2',"LIMITED"," ",.)	
	 	 replace `2' = subinword(`2',"HLDGS"," ",.)	
	 replace `2' = subinword(`2',"INTL","INTERNATIONAL",.)
		 		 replace `2' = subinword(`2',"C / O","C/O",.)
				 replace `2' = subinword(`2',"Y / O","Y/O",.)
				 replace `2' = subinword(`2',"UNITED STATES","US",.)
replace `2'=subinword(`2', "1","One",.)
replace `2'=subinword(`2', "2","TWO",.)
replace `2'=subinword(`2', "3","THREE",.)
replace `2'=subinword(`2', "4","FOUR",.)
replace `2'=subinword(`2', "5","FIVE",.)
replace `2'=subinword(`2', "6","SIX",.)
replace `2'=subinword(`2', "7","SEVEN",.)
replace `2'=subinword(`2', "8","EIGHT",.)
replace `2'=subinword(`2', "9","NINE",.)
replace `2'=subinword(`2', "10","TEN",.)
replace `2'=subinword(`2', "UNO","ONE",.)
replace `2'=subinword(`2', "DOS","TWO",.)
replace `2'=subinword(`2', "TRES","THREE",.)
replace `2'=subinword(`2', "CUATRO","FOUR",.)
replace `2'=subinword(`2', "CINCO","FIVE",.)
replace `2'=subinword(`2', "SEIS","SIX",.)
replace `2'=subinword(`2', "SIETE","SEVEN",.)
replace `2'=subinword(`2', "OCHO","EIGHT",.)
replace `2'=subinword(`2', "NUEVE","NINE",.)
replace `2'=subinword(`2', "DIEZ","TEN",.)
replace `2'=subinstr(`2', "11","ELEVEN",.)
replace `2'=subinstr(`2', "12","TWELVE",.)
replace `2'=subinstr(`2', "13","THIRTEEN",.)
replace `2'=subinstr(`2', "14","FOURTEEN",.)
replace `2'=subinstr(`2', "15","FIFTEEN",.)
replace `2'=subinstr(`2', "16","SIXTEEN",.)
replace `2'=subinstr(`2', "17","SEVETEEN",.)
replace `2'=subinstr(`2', "18","EIGHTEEN",.)
replace `2'=subinstr(`2', "19","NINETEEN",.)
replace `2'=subinstr(`2', "20","TWENTY",.)
replace `2'=subinstr(`2', "21","TWENTY ONE",.)
replace `2'=subinstr(`2', "22","TWENTY TWO",.)
replace `2'=subinstr(`2', "23","TWENTY THREE",.)
replace `2'=subinstr(`2', "24","TWENTY FOUR",.)
replace `2'=subinstr(`2', "25","TWENTY FIVE",.)
replace `2'=subinstr(`2', "26","TWENTY SIX",.)
replace `2'=subinstr(`2', "27","TWENTY SEVEN",.)
replace `2'=subinstr(`2', "28","TWENTY EIGHT",.)
replace `2'=subinstr(`2', "29","TWENTY NINE",.)
replace `2'=subinstr(`2', "30","THIRTY",.)
replace `2'=subinstr(`2', "31","THIRTY ONE",.)
replace `2'=subinstr(`2', "32","THIRTY TWO",.)
replace `2'=subinstr(`2', "33","THIRTY THREE",.)
replace `2'=subinstr(`2', "34","THIRTY FOUR",.)
replace `2'=subinstr(`2', "35","THIRTY FIVE",.)
replace `2'=subinstr(`2', "36","THIRTY SIX",.)
replace `2'=subinstr(`2', "37","THIRTY SEVEN",.)
replace `2'=subinstr(`2', "38","THIRTY EIGHT",.)
replace `2'=subinstr(`2', "39","THIRTY NINE",.)
replace `2'=subinstr(`2', "40","FOURTY",.)
replace `2'=subinstr(`2', "41","FOURTY ONE",.)
replace `2'=subinstr(`2', "42","FOURTY TWO",.)
replace `2'=subinstr(`2', "43","FOURTY THREE",.)
replace `2'=subinstr(`2', "44","FOURTY FOUR",.)
replace `2'=subinstr(`2', "45","FOURTY FIVE",.)
replace `2'=subinstr(`2', "46","FOURTY SIX",.)
replace `2'=subinstr(`2', "47","FOURTY SEVEN",.)
replace `2'=subinstr(`2', "48","FOURTY EIGHT",.)
replace `2'=subinstr(`2', "49","FOURTY NINE",.)
replace `2'=subinstr(`2', "50","FIFTY",.)
replace `2'=subinstr(`2', "51","FIFTY ONE",.)
replace `2'=subinstr(`2', "52","FIFTY TWO",.)
replace `2'=subinstr(`2', "53","FIFTY THREE",.)
replace `2'=subinstr(`2', "54","FIFTY FOUR",.)
replace `2'=subinstr(`2', "55","FIFTY FIVE",.)
replace `2'=subinstr(`2', "56","FIFTY SIX",.)
replace `2'=subinstr(`2', "57","FIFTY SEVEN",.)
replace `2'=subinstr(`2', "58","FIFTY EIGHT",.)
replace `2'=subinstr(`2', "59","FIFTY NINE",.)
replace `2'=subinstr(`2', "60","SIXTY",.)
replace `2'=subinstr(`2', "61","SIXTY ONE",.)
replace `2'=subinstr(`2', "62","SIXTY TWO",.)
replace `2'=subinstr(`2', "63","SIXTY THREE",.)
replace `2'=subinstr(`2', "64","SIXTY FOUR",.)
replace `2'=subinstr(`2', "65","SIXTY FIVE",.)
replace `2'=subinstr(`2', "66","SIXTY SIX",.)
replace `2'=subinstr(`2', "67","SIXTY SEVEN",.)
replace `2'=subinstr(`2', "68","SIXTY EIGHT",.)
replace `2'=subinstr(`2', "69","SIXTY NINE",.)
replace `2'=subinstr(`2', "70","SEVENTY",.)
replace `2'=subinstr(`2', "71","SEVENTY ONE",.)
replace `2'=subinstr(`2', "72","SEVENTY TWO",.)
replace `2'=subinstr(`2', "73","SEVENTY THREE",.)
replace `2'=subinstr(`2', "74","SEVENTY FOUR",.)
replace `2'=subinstr(`2', "75","SEVENTY FIVE",.)
replace `2'=subinstr(`2', "76","SEVENTY SIX",.)
replace `2'=subinstr(`2', "77","SEVENTY SEVEN",.)
replace `2'=subinstr(`2', "78","SEVENTY EIGHT",.)
replace `2'=subinstr(`2', "79","SEVENTY NINE",.)
replace `2'=subinstr(`2', "80","EIGHTY",.)
replace `2'=subinstr(`2', "81","EIGHTY ONE",.)
replace `2'=subinstr(`2', "82","EIGHTY TWO",.)
replace `2'=subinstr(`2', "83","EIGHTY THREE",.)
replace `2'=subinstr(`2', "84","EIGHTY FOUR",.)
replace `2'=subinstr(`2', "85","EIGHTY FIVE",.)
replace `2'=subinstr(`2', "86","EIGHTY SIX",.)
replace `2'=subinstr(`2', "87","EIGHTY SEVEN",.)
replace `2'=subinstr(`2', "88","EIGHTY EIGHT",.)
replace `2'=subinstr(`2', "89","EIGHTY NINE",.)
replace `2'=subinstr(`2', "90","NINETY",.)
replace `2'=subinstr(`2', "91","NINETY ONE",.)
replace `2'=subinstr(`2', "92","NINETY TWO",.)
replace `2'=subinstr(`2', "93","NINETY THREE",.)
replace `2'=subinstr(`2', "94","NINETY FOUR",.)
replace `2'=subinstr(`2', "95","NINETY FIVE",.)
replace `2'=subinstr(`2', "96","NINETY SIX",.)
replace `2'=subinstr(`2', "97","NINETY SEVEN",.)
replace `2'=subinstr(`2', "98","NINETY EIGHT",.)
replace `2'=subinstr(`2', "99","NINETY NINE",.)



drop if index(destination2, "SA DE CV")>0
drop if index(destination2, "S A DE C V")>0
drop if index(destination2, "SDE RL DE CV")>0
drop if index(destination2, "S DE R L DE CV")>0
drop if index(destination2, "S DE RL DE CV")>0
drop if index(destination2, "SDE RLDE CV")>0
drop if index(destination2, "S DE R L DE C V")>0
drop if index(destination2, "SADE CV")>0
drop if index(destination2, "SA / CV")>0
drop if index(destination2, "SACV")>0
drop if index(destination2, "SADE CV")>0
drop if index(destination2, "S A DE CV")>0
drop if index(destination2, "S DE RLDE CV")>0
drop if index(destination2, "S A DE CV")>0
drop if index(destination2, "S DE RL")>0
drop if index(destination2, "SDE RL")>0
drop if index(destination2, "S DE RLDE CV")>0
drop if index(destination2, "DE C V")>0
drop if index(destination2, "S DE  RL DE  CV")>0
drop if index(destination2, "S DE  RL DE CV")>0
drop if index(destination2, "S/RL/CV")>0
drop if index(destination2, "DECV")>0
drop if index(destination2, "SOCIEDAD COOPERATIVA")>0
drop if index(destination2, "S DE PR DE RL")>0
drop if index(destination2, "SPR DE RL")>0
drop if index(destination2, "S RL")>0
drop if index(destination2, "SPR RL")>0
drop if index(destination2, "S PR RL")>0
drop if index(destination2, "S P R  R L")>0
drop if index(destination2, "SRL")>0
drop if index(destination2, "S  P R  R L")>0
drop if index(destination2, "SPR")>0
drop if index(destination2, "SC  RL")>0
drop if index(destination2, "SCL")>0

	replace `2' = trim(`2')
	gen wordnumber=wordcount(destination2)
egen maxwordnumber=max(wordnumber)
gen destination3=""
gen nslash=0
	local j=1
while `j'<=maxwordnumber{
gen dest`j'=word(destination2,`j')
gen slash`j'=(dest`j'=="/"|dest`j'=="C/O"|dest`j'=="Y/O" |dest`j'=="DBA" )
replace destination3=destination3+" "+dest`j'
replace nslash=nslash+slash`j'
drop dest`j'
drop slash`j'
local j=`j'+1
}
replace `2'=destination3
drop destination3
replace `2' = trim(`2')
egen id3=group(`2')
save `5', replace
collapse (sum) valorendolares, by (`2')
capture sort `2'
     save `4', replace
   }
end




/* Clean the destination name*/

capture program drop cleanSlash
program define cleanSlash
   {
local i=`1'
while `i'<=`2'{

cleanNames "${temp}custom_dest_registry2004to2011_hs`i'_rfcfracprice" destination2 destination temp temp2

use temp2, replace
egen maxnslash=max(nslash)
gen destnametemp1=destination2
drop maxwordnumber
gen slash=0

local k=1
while `k'<= maxnslash{
local l=`k'+1
capture gen destnametemp`l'=destnametemp`k' 
capture gen destnameclean`k'=destnametemp`k' if nslash<`k'
capture replace destnameclean`k'=destnametemp`k' if nslash<`k'
capture replace destnametemp`k'="" if nslash<`k'

replace wordnumber=wordcount(destnametemp`k')
egen maxwordnumber=max(wordnumber)
capture gen destnameclean`l'=""

replace slash=0
local j=0
while `j'<=maxwordnumber-1{
local p=maxwordnumber-`j'
replace slash=`p' if word(destnametemp`k',`p')=="C/O"
replace slash=`p' if word(destnametemp`k',`p')=="/"
replace slash=`p' if word(destnametemp`k',`p')=="Y/O"
replace slash=`p' if word(destnametemp`k',`p')=="DBA"
local j=`j'+1
}

local j=1
while `j'<=maxwordnumber{
gen dest`j'_bs=word(destnametemp`k',`j') if `j'<slash
gen dest`j'_as=word(destnametemp`k',`j') if `j'>slash
local j=`j'+1
}

replace destnametemp`k'=""
replace destnametemp`l'=""

local j=1
while `j'<=maxwordnumber{
replace destnametemp`k'=destnametemp`k'+" "+dest`j'_bs if nslash>=`k'
drop dest`j'_bs
replace destnametemp`l'=destnametemp`l'+" "+dest`j'_as if nslash>=`k'
drop dest`j'_as
local j=`j'+1
}

replace destnametemp`k'=trim(destnametemp`k')
replace destnametemp`l'=trim(destnametemp`l')
replace destnameclean`k'=destnametemp`k' if  destnametemp`k'!="" & nslash>=`k'
replace destnameclean`l'=destnametemp`l' if  destnametemp`k'!="" & nslash>=`k'
drop maxwordnumber
local k=`k'+1
}

drop destnametemp*** maxnslash slash wordnumber
save "${temp}custom_dest_registry2004to2011_hs`i'_slashcleaned_rfcfracprice", replace

egen maxnslash=max(nslash)
gen maxnsep=maxnslash+1

local j=2
while `j'<=maxnsep{
capture keep if destnameclean`j'!=""
capture drop if length(destnameclean`j')==1
capture drop if `j'=="USA"
capture replace destnameclean1=destnameclean`j'
capture save "${temp}custom_dest_registry2004to2011_hs`i'_slashcleaned_temp`j'_rfcfracprice", replace
local j=`j'+1
}

use "${temp}custom_dest_registry2004to2011_hs`i'_slashcleaned_rfcfracprice", replace


egen maxnslash=max(nslash)
gen maxnsep2=maxnslash+1

save, replace

local j=2
while `j'<=maxnsep2{
capture use "${temp}custom_dest_registry2004to2011_hs`i'_slashcleaned_rfcfracprice", replace
capture append using "${temp}custom_dest_registry2004to2011_hs`i'_slashcleaned_temp`j'_rfcfracprice"
capture save, replace
capture erase "${temp}custom_dest_registry2004to2011_hs`i'_slashcleaned_temp`j'_rfcfracprice.dta"
local j=`j'+1
}

capture drop destnameclean2-maxnsep
capture gen destnameclean1=destination2
sort destnameclean1
egen id4=group(destnameclean1)
save, replace
local i=`i'+1
}
   }
end



/* Clean zipcode, municipality, and iddest (ein) */

capture program drop loopHSCleanIddest_1
program define loopHSCleanIddest_1
   {
local i=`1'
while `i'<=`2'{
use "${temp}custom_dest_registry2004to2011_hs`i'_slashcleaned_rfcfracprice", replace

/*** cleaning zipcode ***/
gen cp2=cp
replace cp2=subinstr(cp2, "-", "",.)
replace cp2=substr(cp2, 1, 5)
destring cp2, replace force
replace cp2=. if cp2==0
replace cp2=. if cp2==1
rename cp2 zip
sort zip

merge n:1 zip using "${temp}usziplist_zp4.dta"

drop if _merge==2
gen nouszip=(zip!=. & _merge==1)
drop _merge

rename zip cp2

gen cp_9=cp
replace cp_9="" if length(cp_9)!=10

gen zip_complete=cp
replace zip_complete=subinstr(zip_complete, "-", "",.)
replace zip_complete=substr(zip_complete, 1, 5)
replace zip_complete=cp_9 if cp_9!=""
drop cp_9


/*** cleaning street number ***/
gen number_ext2=number_ext
replace number_ext2="" if number_ext2=="0"
replace number_ext2="" if number_ext2=="."
replace number_ext2="" if number_ext2=="-"
replace number_ext2="" if number_ext2=="SN"
replace number_ext2="" if number_ext2=="S N"
replace number_ext2="" if number_ext2=="S/N"
replace number_ext2="" if number_ext2=="SIN NUMERO"
replace number_ext2=trim(number_ext2)

gen number_int2=number_int
replace number_int2="" if number_int2=="0"
replace number_int2="" if number_int2=="."
replace number_int2="" if number_int2=="-"
replace number_int2="" if number_int2=="SN"
replace number_int2="" if number_int2=="S N"
replace number_int2="" if number_int2=="S/N"
replace number_int2=""  if number_int2=="SIN NUMERO"
replace number_int2="" if number_int2==number_ext2
replace number_int2=trim(number_int2)




/*** cleaning iddest(ein) 
creating new id called iddest2 ***/

gen iddest2=iddest
replace iddest2=subinstr(iddest2, " ", "",.)
replace iddest2=subinstr(iddest2, "-", "",.)
replace iddest2=trim(iddest2)

gen length=length(iddest2)
replace iddest2=substr(iddest2, 1, 9) if length==11
replace iddest2="" if length!=9
destring iddest2, gen(iddest3) force	
replace iddest2="" if iddest3==.
replace iddest2="" if iddest3==0 
replace iddest2="" if iddest3==123456789
replace iddest2="" if iddest3==111111111
replace iddest2="" if iddest3==222222222
replace iddest2="" if iddest3==333333333
replace iddest2="" if iddest3==444444444
replace iddest2="" if iddest3==555555555
replace iddest2="" if iddest3==666666666
replace iddest2="" if iddest3==777777777
replace iddest2="" if iddest3==888888888
replace iddest2="" if iddest3==999999999

/*** checking if ein has correct first two digit***/		

foreach name in "07" "08" "09" "17" "18" "19" "28" "29" "49" "69" "70" "78" "79" "89" "96" "97"{
replace iddest2="" if substr(iddest2, 1, 2)=="`name'"
        }
		
/*** checking if zipcode is written wrongly***/		

gen iddest2_5=substr(iddest2, 1, 5)
destring iddest2_5, replace
replace iddest2="" if iddest2_5==cp2


/*** cleaning municipality (taking out state component) ***/

replace mun=upper(mun)
replace mun=subinstr(mun, ",", "",.)
replace mun=subinstr(mun, ".", "",.)
replace mun=subinstr(mun, ";", "",.)
replace mun=subinword(mun, "NUEVA", "NEW",.)
replace mun=trim(mun)
gen lastwmun=word(mun, -1)

foreach name in "AK" "AL" "AR" "AZ" "CA" "CAL" "CO" "CT" "DC" "DE" "FL" "GA" "HI" "IA" "ID" "IL" "IN" "KS" "KY" "LA" "MA"  "MD" "ME" "MI" "MN" "MO" "MS" "MT" "NC" "ND" "NE" "NH" "NJ" "NM" "NV" "NY" "OH" "OK" "OR" "PA" "RI" "SC" "SD" "TN" "TX" "UT" "VA" "VT" "WA" "WI" "WV" "WY"  "USA" "EUA"{
replace mun=subinword(mun, "`name'","",.)
replace mun=trim(mun)
        }

		gen state_munname=""
		foreach name in "AK" "AL" "AR" "AZ" "CA" "CO" "CT" "DC" "DE" "FL" "GA" "HI" "IA" "ID" "IL" "IN" "KS" "KY" "LA" "MA"  "MD" "ME" "MI" "MN" "MO" "MS" "MT" "NC" "ND" "NE" "NH" "NJ" "NM" "NV" "NY" "OH" "OK" "OR" "PA" "RI" "SC" "SD" "TN" "TX" "UT" "VA" "VT" "WA" "WI" "WV" "WY" {
replace state_munname=lastwmun if lastwmun=="`name'"
replace state_munname=trim(state_munname)
        }
		
		foreach name in "ALASKA" "ALABAMA" "ARKANSAS" "ARIZONA" "CALIFORNIA" "COLORADO" "CONNETICUT" "DC" "DELAWARE" "FLORIDA" "GEORGIA" "HAWAII" "IOWA" "IDAHO" "ILLINOIS" "INDIANA" "KANSAS" "KENTUCKY" "LOUISIANA" "MASSACHUSETTS" "MASSACHUSETS" "MARYLAND" "MAINE" "MICHIGAN" "MINNESOTA" "MISSOURI" "MISSISSIPPI" "MONTANA" "NOTH CAROLINA" "NORTH DAKOTA" "NEBRASKA" "NEW HAMPSHIRE" "NEW JERSEY" "NEW MEXICO" "NEVADA" "NEW YORK" "OHIO" "OKLAHOMA" "OREGON" "PENNSYLVANIA" "RHODE ISLAND" "SOUTH CAROLINA" "SOUTH DAKOTA" "TENNESSEE" "TEXAS" "UTAH" "VIRGINIA" "VERMONT" "WASHINGTON" "WISCONSIN" "WEST VIRGINIA" "WYOMING" "ESTADOS UNIDOS  AMERICA"{
replace mun=subinword(mun, "`name'","",.) if subinword(mun, "`name'","",.)!=""
replace mun=trim(mun)
        }

		foreach name in "ALASKA" "ALABAMA" "ARKANSAS" "ARIZONA" "CALIFORNIA" "COLORADO" "CONNETICUT" "DC" "DELAWARE" "FLORIDA" "GEORGIA" "HAWAII" "IOWA" "IDAHO" "ILLINOIS" "INDIANA" "KANSAS" "KENTUCKY" "LOUISIANA" "MASSACHUSETTS" "MARYLAND" "MAINE" "MICHIGAN" "MINNESOTA" "MISSOURI" "MISSISSIPPI" "MONTANA" "NOTH CAROLINA" "NORTH DAKOTA" "NEBRASKA" "NEW HAMPSHIRE" "NEW JERSEY" "NEW MEXICO" "NEVADA" "NEW YORK" "OHIO" "OKLAHOMA" "OREGON" "PENNSYLVANIA" "RHODE ISLAND" "SOUTH CAROLINA" "SOUTH DAKOTA" "TENNESSEE" "TEXAS" "UTAH" "VIRGINIA" "VERMONT" "WASHINGTON" "WISCONSIN" "WEST VIRGINIA" "WYOMING" {
replace state_munname=lastwmun if lastwmun=="`name'"
replace state_munname=trim(state_munname)
        }

		
gen wmunnum=wordcount(mun)
egen maxwordnumber=max(wmunnum)
		
local j=1
while `j'<=maxwordnumber{
gen mun`j'=word(mun,`j')
replace mun`j'=subinstr(mun`j', "USA", ".",.) if `j'==wmunnum
replace mun`j'=subinstr(mun`j', "TX", ".",.) if `j'==wmunnum
replace mun`j'=subinstr(mun`j', "TEXAS", ".",.) if `j'==wmunnum
replace mun`j'=subinstr(mun`j', "TEX", ".",.) if `j'==wmunnum
destring mun`j', force generate(aaa`j')
replace mun`j'="" if aaa`j'!=.
drop aaa`j'
local j=`j'+1
}
		

replace mun=mun1

local j=2
while `j'<=maxwordnumber{
replace mun=mun+""+mun`j'
local j=`j'+1
}

replace mun=trim(mun)
replace mun=subinstr(mun, ".", "",.)

		
rename cp2 Zipcode
sort Zipcode
merge n:1 Zipcode using "${rawdata}uszipcode_county_wide.dta"
drop if _merge==2
rename state state_zip
rename county1 conty_zip1
rename county2 conty_zip2
rename county3 conty_zip3
rename Zipcode cp2
drop _merge

sort mun
merge n:1 mun using "${rawdata}uszipmun_state_wide.dta"
drop if _merge==2
drop _merge
save "${temp}custom_dest_registry2004to2011_cleaned_hs`i'_rfcfracmaquilaprice", replace

local i=`i'+1
}

   }
end


/*** deleting transactions that are by individuals and by transportation companies***/

capture program drop prepareForZP4_1
program define prepareForZP4_1
   {
local i=`1'
while `i'<=`2'{

use "${temp}custom_dest_registry2004to2011_cleaned_hs`i'_rfcfracmaquilaprice.dta", clear

keep if destnameclean1!=""
keep if firm==1
keep if transportista==0

replace mun=trim(mun)

gen cleanstreet=street
replace cleanstreet=subinstr(cleanstreet, ".", " ",.)
replace cleanstreet=subinstr(cleanstreet, ",", " ",.)
replace cleanstreet=trim(cleanstreet)
replace cleanstreet=subinword(cleanstreet, "P O", "PO", .)

gen firstst=word(cleanstreet, 1)
replace firstst=upper(firstst)
replace firstst=subinstr(firstst, "-", "",.)
replace firstst=trim(firstst)
gen aaa=(firstst==number_ext2)
gen bbb=(firstst==number_int2)
gen po=(index( cleanstreet, "PO BOX")>0 | index( cleanstreet, "PO  BOX")>0 )

gen lastst=word(cleanstreet, -1)
replace lastst=subinstr(lastst, "-", "",.)
replace lastst=trim(lastst)
gen ccc=(lastst==number_ext2)
gen ddd=(lastst==number_int2)

gen streetext=street 
replace streetext=number_ext2+" "+street if aaa==0 & ccc==0 & po==0
replace streetext=streetext+" "+number_int2 if bbb==0 & ddd==0 & po==0
 
replace streetext=subinstr(streetext, ".", " ",.)
replace streetext=subinstr(streetext, ",", " ",.)
replace streetext=trim(streetext)
replace zip_complete="" if zip_complete=="."
replace zip_complete="" if zip_complete=="0"
replace state_munname="TX" if state_munname=="TEXAS"
replace state_zip=state_munname if state_zip==""
keep canto** cveunidad precio valorendolares rfc maquila frac year month destnameclean1 destination2 iddest2 streetext mun cp2 small

gen addressid=_n
sort addressid
gen hs2=`i'
save "${temp}address_beforezp4all_hs`i'_rfcfracmaquilaprice", replace
local i=`i'+1
}
   }
end



getDestinationHS 1 76
getDestinationHS 78 99
cleanSlash 1 76
cleanSlash 78 99

loopHSCleanIddest_1 1 76
loopHSCleanIddest_1 78 99

set more off
prepareForZP4_1 1 76
prepareForZP4_1 78 99


getDestinationHS 50 63
cleanSlash 50 63
loopHSCleanIddest_1 50 63
prepareForZP4_1 50 63


/*************************************************************************
*********************************************************************** 
Merging Back the unified US importer ID for price
***************************************************************************/

forvalues i=50/63{

use ${temp}address_beforezp4all_hs`i'_rfcfracmaquilaprice.dta, replace

rename streetext addressoriginal

merge m:1 addressoriginal cp2 mun using ${output_data}address_beforeafterzp4.dta
drop _merge

keep if hs2==`i'
replace addressfinal=streetext if addressfinal==""
replace cityfinal=mun if cityfinal==""


rename destination2 name
rename addressfinal address
rename cityfinal city
rename zipfinal zip

replace address=upper(address)
replace city=upper(city)

replace cantomer=0 if cantomer==.
replace cantomc=0 if cantomc==.
gen cant=cantomer+cantomc
drop cantom*

sort name address city zip rfc frac addressoriginal cp2 mun month year valorendolares maquila preciounitario
by name address city zip rfc frac addressoriginal cp2 mun month year valorendolares maquila preciounitario: gen a=_n 
keep if a==1
save ${temp}adress_afterzp4_hs`i'_fracmaquilaprice, replace
}


forvalues i=50/63{
use ${rawdata}matched_hs`i'_common.dta, replace
drop _merge
rename f_clid_section newclid2
sort name address city zip newclid2
by name address city zip newclid2: gen a=_n
keep if a==1
by name address city zip: gen b=_N
gen indicator=1 if b>1
egen id_repeat=min(newclid2), by (name address city zip)
replace newclid2=id_repeat if indicator==1
sort name address city zip newclid2
by name address city zip newclid2: gen c=_n
keep if c==1
drop a  b c vol_dollars n_trans earliest latest
merge 1:m name address city zip using ${temp}adress_afterzp4_hs`i'_fracmaquilaprice
drop if _merge==2
save ${temp}rfc_id_merge_hs`i'_fracmaquilaprice, replace
}


**** Generate importer-exporter values by  year at the product level.

use ${temp}rfc_id_merge_hs50_fracmaquilaprice, replace
forvalues i=51/63{
append using ${temp}rfc_id_merge_hs`i'_fracmaquilaprice
}
rename valorendolares value_exporter_importer
rename cant cant_exporter_importer
drop _merge
merge m:1 newclid2 using ${output_data}naics2007_5063_newclid2.dta
drop _merge
save ${temp}exporter_importer_50_63_productlevel_new_maquilaprice, replace



***Generate exports from 501_551_505 by firm-product-maquila exports by month-year


foreach type in "06" "07" "08" "09" "10" "11" "12"{
use ${temp}501_551_`type'2004use, replace

gen definite=.
replace definite=1 if cveddocument=="A1" 
replace definite=2 if cveddocument=="RT" 
replace definite=1 if cveddocument=="A1"
replace definite=1 if cveddocument=="G1"
replace definite=1 if cveddocument=="G6"
replace definite=1 if cveddocument=="G7"
replace definite=1 if cveddocument=="L1"
replace definite=1 if cveddocument=="T1"
replace definite=1 if cveddocument=="F8"
replace definite=1 if cveddocument=="F9"

replace definite=2 if cveddocument=="I1"
replace definite=2 if cveddocument=="J1"
replace definite=2 if cveddocument=="J2"
replace definite=2 if cveddocument=="RT"

replace definite=4 if cveddocument=="BA"
replace definite=4 if cveddocument=="BE"
replace definite=4 if cveddocument=="BM"
replace definite=4 if cveddocument=="BO"
replace definite=4 if cveddocument=="J3"
replace definite=4 if cveddocument=="BP"
replace definite=4 if cveddocument=="BR"
replace definite=4 if cveddocument=="J4"

gen maquila=0
replace maquila=1 if definite==2 | definite==4


gen year=2004
keep if cvepaisorigendestino=="USA"&cvepaiscomprador=="USA"

destring fraccionarancelaria, replace force
gen ind=1 if fraccionarancelaria<10000000
replace ind=. if fraccionarancelaria<1000000
replace ind=1 if fraccionarancelaria<100000
tostring fraccionarancelaria, replace
gen hs2=substr(fraccionarancelaria, 1, 2)
replace hs2=substr(fraccionarancelaria, 1, 1) if ind==1

collapse (sum) valorendolares cantomciaenunidadesdmedidacome, by (rfc hs2 frac year pataduanal numpmento cvesecadua maquila  preciounitario  cveunidad)
gen combination=0

merge m:1 pataduanal numpmento cvesecadua using ${temp}505-`type'2004exp_single

replace combination=1 if _merge==3
drop _merge
destring hs2, replace force
keep if hs2>49&hs2<64
gen value_combination=valorendolares if combination==1
save ${temp}combination_frac_50_63_`type'2004_price, replace

}


forvalues j=5/9{
foreach type in "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12"{
use ${temp}501_551_`type'200`j'use, replace

gen year=200`j'
keep if cvepaisorigendestino=="USA"&cvepaiscomprador=="USA"

destring fraccionarancelaria, replace force
gen ind=1 if fraccionarancelaria<10000000
replace ind=. if fraccionarancelaria<1000000
replace ind=1 if fraccionarancelaria<100000
tostring fraccionarancelaria, replace
gen hs2=substr(fraccionarancelaria, 1, 2)
replace hs2=substr(fraccionarancelaria, 1, 1) if ind==1

gen definite=.
replace definite=1 if cveddocument=="A1" 
replace definite=2 if cveddocument=="RT" 
replace definite=1 if cveddocument=="A1"
replace definite=1 if cveddocument=="G1"
replace definite=1 if cveddocument=="G6"
replace definite=1 if cveddocument=="G7"
replace definite=1 if cveddocument=="L1"
replace definite=1 if cveddocument=="T1"
replace definite=1 if cveddocument=="F8"
replace definite=1 if cveddocument=="F9"

replace definite=2 if cveddocument=="I1"
replace definite=2 if cveddocument=="J1"
replace definite=2 if cveddocument=="J2"
replace definite=2 if cveddocument=="RT"

replace definite=4 if cveddocument=="BA"
replace definite=4 if cveddocument=="BE"
replace definite=4 if cveddocument=="BM"
replace definite=4 if cveddocument=="BO"
replace definite=4 if cveddocument=="J3"
replace definite=4 if cveddocument=="BP"
replace definite=4 if cveddocument=="BR"
replace definite=4 if cveddocument=="J4"

gen maquila=0
replace maquila=1 if definite==2 | definite==4

collapse (sum) valorendolares cantomciaenunidadesdmedidacome, by (rfc hs2 frac year pataduanal numpmento cvesecadua maquila preciounitario  cveunidad)

gen combination=0
merge m:1 pataduanal numpmento cvesecadua using ${temp}505-`type'200`j'exp_single

replace combination=1 if _merge==3
drop _merge
destring hs2, replace force
keep if hs2>49&hs2<64
gen value_combination=valorendolares if combination==1
save ${temp}combination_frac_50_63_`type'200`j'_price, replace

}
}

forvalues j=0/1{
foreach type in "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12"{
use ${temp}501_551_`type'201`j'use, replace

gen year=201`j'
keep if cvepaisorigendestino=="USA"&cvepaiscomprador=="USA"

destring fraccionarancelaria, replace force
gen ind=1 if fraccionarancelaria<10000000
replace ind=. if fraccionarancelaria<1000000
replace ind=1 if fraccionarancelaria<100000
tostring fraccionarancelaria, replace
gen hs2=substr(fraccionarancelaria, 1, 2)
replace hs2=substr(fraccionarancelaria, 1, 1) if ind==1


gen definite=.
replace definite=1 if cveddocument=="A1" 
replace definite=2 if cveddocument=="RT" 
replace definite=1 if cveddocument=="A1"
replace definite=1 if cveddocument=="G1"
replace definite=1 if cveddocument=="G6"
replace definite=1 if cveddocument=="G7"
replace definite=1 if cveddocument=="L1"
replace definite=1 if cveddocument=="T1"
replace definite=1 if cveddocument=="F8"
replace definite=1 if cveddocument=="F9"

replace definite=2 if cveddocument=="I1"
replace definite=2 if cveddocument=="J1"
replace definite=2 if cveddocument=="J2"
replace definite=2 if cveddocument=="RT"

replace definite=4 if cveddocument=="BA"
replace definite=4 if cveddocument=="BE"
replace definite=4 if cveddocument=="BM"
replace definite=4 if cveddocument=="BO"
replace definite=4 if cveddocument=="J3"
replace definite=4 if cveddocument=="BP"
replace definite=4 if cveddocument=="BR"
replace definite=4 if cveddocument=="J4"

gen maquila=0
replace maquila=1 if definite==2 | definite==4

collapse (sum) valorendolares cantom, by (rfc hs2 frac year pataduanal numpmento cvesecadua maquila  preciounitario  cveunidad)

gen combination=0
merge m:1 pataduanal numpmento cvesecadua using ${id}505-`type'201`j'exp_single
replace combination=1 if _merge==3
drop _merge
destring hs2, replace force
keep if hs2>49&hs2<64
gen value_combination=valorendolares if combination==1
save ${temp}combination_frac_50_63_`type'201`j'_price, replace
}
}

foreach type in "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12"{
use ${temp}combination_frac_50_63_`type'2011_price, replace
rename cantomercanciaunidadesmedida cantomciaenunidadesdmedidacome
save ${temp}combination_frac_50_63_`type'2011_price, replace
}

*** dropping individuals and transport companies

use ${temp}combination_frac_50_63_062004_price, replace
gen month=6
foreach type in "07" "08" "09" "10" "11" "12"{
append using ${temp}combination_frac_50_63_`type'2004_price
replace month=`type' if month==.
}
forvalues j=5/9{
foreach type in "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12"{
append using ${temp}combination_frac_50_63_`type'200`j'_price
replace month=`type' if month==.
}
}
forvalues j=0/1{
foreach type in "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12"{
append using ${temp}combination_frac_50_63_`type'201`j'_price
replace month=`type' if month==.
}
}

drop if rfc==""
replace rfc=subinstr(rfc," ","",.)
gen frfc=substr(rfc, 4, 1)
destring frfc, replace force
gen individual=1 if frfc==.
replace individual=1 if rfc=="EXTR920901TS4"
replace individual=1 if rfc=="AADC930401KX7"
replace individual=1 if rfc=="ESTU930401QZ7"
replace individual=1 if rfc=="ESTU930401QZ7"
replace individual=1 if rfc=="EJID930401SJ5"
replace individual=1 if rfc=="EMB930401KH4"
replace individual=1 if rfc=="OIN9304013N0"
replace individual=1 if rfc=="EDM930614781"

drop if individual==1

rename rfc rfctransta
sort rfctransta
merge m:1 rfctransta using "${output_data}transportista_rfclist.dta"
drop if _merge==3
drop _merge
rename rfctransta rfc
collapse aggexport_0 aggexport_1 (sum)value_combination valorendolares cantomciaenunidadesdmedidacome, by(frac hs2 rfc year maquila preciounitario cveunidad month)
gen bbb=valorendolares*maquila
gen ccc=value_combination*maquila
bys rfc frac year: egen maquilavalue=sum(bbb)
bys rfc frac year: egen maquila_comb_value=sum(ccc)
duplicates drop
drop if month<6
save ${temp}501_551_505_frac_50_63_junedecember_maquila_price, replace


*** Merge importer information and keep exporters for which we know partners for more than 80% of its trade. 
use ${temp}501_551_505_frac_50_63_junedecember_maquila_price, replace
destring hs2, replace force

bys year: egen aggvalue_maquila_1=sum(maquilavalue)
merge 1:m rfc hs2 year month frac maquila cveunidad precio using ${temp}exporter_importer_50_63_productlevel_new_junedecember_maquilaprice
gen hs6=substr(frac, 1,6)
drop if _merge==2
gen both1=1 if _merge==3
replace both1=0 if both1==.
keep if both!=0
drop both1

bys rfc hs2 year frac maquila: gen exporter_maquila_count=(_n==1)
gen value_exporter_maquila_temp=valorendolares if exporter_maquila_count==1 
replace value_exporter_maquila_temp=0 if exporter_maquila_count==0 /*avoid double counting*/
egen value_exporter=total(value_exporter_maquila_temp), by(rfc hs2 frac year) /*summing maquila and non-maquila**/
bys rfc hs2 year frac: gen exporter_count=_n
replace exporter_count=0 if exporter_count!=1 
gen value_exporter_temp=value_exporter
replace value_exporter_temp=0 if exporter_count!=1
gen maquilavalue_temp=maquilavalue
replace maquilavalue_temp=0 if exporter_count!=1
collapse  (sum) value_exporter_temp maquilavalue_temp value_exporter_importer canto, by(rfc hs6 year newclid2 maquila)
bys rfc hs6 year: egen valorendolares=sum(value_exporter_temp)
bys rfc hs6 year: egen maquilavalue=sum(maquilavalue_temp)
bys rfc hs6 year: egen knownvalue=sum(value_exporter_importer)
gen knownratio=knownvalue/valorendolares
gen known=0
replace known=1 if knownratio>=0.8

save ${temp}matched_exp_imp_hs6_junedecember_maquilaprice, replace

****Create restricted data with price and maquila
use ${temp}matched_exp_imp_hs6_junedecember_maquilaprice, replace

***Drop observations if we have less than two exporters by hs6 product
bys year hs6 rfc: gen exporter=_n
replace exporter=0 if exporter>1
egen exporter_number=total(exporter), by(year hs6)
replace exporter_number=. if year!=2004
egen exporter_num=min(exporter_number), by( hs6)
drop if exporter_num<2

***Drop observations if we have less than two importers by hs6 product
bys year hs6 newclid2: gen importer=_n
replace importer=0 if importer>1
egen importer_number=total(importer), by(year hs6)
replace importer_number=. if year!=2004
egen importer_num=min(importer_number), by( hs6)
drop if importer_num<2

save ${temp}matched_exp_imp_hs6_junedecember_maquilaprice_restrict, replace
